In [1]:
import pandas as pd

df = pd.read_csv('combined_cycle_power_plant (1).csv')

### Data Cleaning

In [2]:
df[['Temperature','Exhaust Vacuum','Ambient Pressure','Relative Humidity','Energy Output']] = df['temperature;exhaust_vacuum;ambient_pressure;relative_humidity;energy_output'].str.split(';',5,expand=True)

In [3]:
df = df.drop(columns ='temperature;exhaust_vacuum;ambient_pressure;relative_humidity;energy_output')

In [4]:
df = df.astype(float)

In [5]:
df.isnull().sum()

Temperature          0
Exhaust Vacuum       0
Ambient Pressure     0
Relative Humidity    0
Energy Output        0
dtype: int64

### Data Analysis

In [6]:
# Standard statistical methods
df.describe()

Unnamed: 0,Temperature,Exhaust Vacuum,Ambient Pressure,Relative Humidity,Energy Output
count,9568.0,9568.0,9568.0,9568.0,9568.0
mean,19.651231,54.305804,1013.259078,73.308978,454.365009
std,7.452473,12.707893,5.938784,14.600269,17.066995
min,1.81,25.36,992.89,25.56,420.26
25%,13.51,41.74,1009.1,63.3275,439.75
50%,20.345,52.08,1012.94,74.975,451.55
75%,25.72,66.54,1017.26,84.83,468.43
max,37.11,81.56,1033.3,100.16,495.76


In [7]:
# Correlations between the columns
df.corr()

Unnamed: 0,Temperature,Exhaust Vacuum,Ambient Pressure,Relative Humidity,Energy Output
Temperature,1.0,0.844107,-0.507549,-0.542535,-0.948128
Exhaust Vacuum,0.844107,1.0,-0.413502,-0.312187,-0.86978
Ambient Pressure,-0.507549,-0.413502,1.0,0.099574,0.518429
Relative Humidity,-0.542535,-0.312187,0.099574,1.0,0.389794
Energy Output,-0.948128,-0.86978,0.518429,0.389794,1.0


In [8]:
# From the tables above, it looks like temperature is highly positively correlated with exhaust vacuum
# and highly negatively correlated with energy output. The high negative correlation between exhaust vacuum 
# and energy output could be due to the high positive correlation between temperature and exhaust vacuum.

# Energy output also has a moderately positive correlation with ambient pressure and relative humidity. 

In [9]:
print("Max Energy: ")
print( ( (df['Energy Output'].max() / df['Energy Output'].mean()) - 1 ) )
print("Min Energy: ")
print( ( (df['Energy Output'].min() / df['Energy Output'].mean()) - 1) )
print()

print("Average Temp: " + str(df['Temperature'].mean()))
print ( abs( df['Temperature'].max() / df['Temperature'].mean()  - 1))
print ( abs( df['Temperature'].min() / df['Temperature'].mean() - 1))
print()

print("Highest Temp: " + str( df['Temperature'].max()) )
print( ( (df.iloc[df['Temperature'].idxmax()]['Energy Output']) / df['Energy Output'].mean() ) - 1)
print()

print("Lowest Temp: " + str( df['Temperature'].min()) )
print( ( (df.iloc[df['Temperature'].idxmin()]['Energy Output']) / df['Energy Output'].mean() ) - 1)
print()

Max Energy: 
0.09110514616371779
Min Energy: 
-0.07506081828553324

Average Temp: 19.651231187290936
0.888431297068053
0.9078938117032289

Highest Temp: 37.11
-0.05527496371071505

Lowest Temp: 1.81
0.07963859417986896



In [10]:
# The maximum energy output is 9.1% higher than the average.
# The minimum energy output is 7.5% lower than the average.

# The energy output from the highest temp is 5.5% lower than average
# The energy output from the lowest temp is 7.9% higher than average

In [11]:
sorted = df.sort_values(by='Energy Output', ascending=False)
top_25 = len(df) * (1/4)
mid_50 = len(df) * (1/2)
lower_25 = len(df) * (3/4)

top = sorted[:int(top_25)]
mid = sorted[int(mid_50):int(lower_25)]
lower = sorted[int(lower_25):]

top.corr()

Unnamed: 0,Temperature,Exhaust Vacuum,Ambient Pressure,Relative Humidity,Energy Output
Temperature,1.0,0.176202,-0.085802,-0.279095,-0.804691
Exhaust Vacuum,0.176202,1.0,0.029044,0.064017,-0.182664
Ambient Pressure,-0.085802,0.029044,1.0,-0.158405,0.063284
Relative Humidity,-0.279095,0.064017,-0.158405,1.0,0.080394
Energy Output,-0.804691,-0.182664,0.063284,0.080394,1.0


In [14]:
top.mean()

Temperature            10.067492
Exhaust Vacuum         40.718215
Ambient Pressure     1017.100773
Relative Humidity      80.958023
Energy Output         477.745982
dtype: float64

In [17]:
mid.corr()

Unnamed: 0,Temperature,Exhaust Vacuum,Ambient Pressure,Relative Humidity,Energy Output
Temperature,1.0,0.251386,-0.059887,-0.668119,-0.526098
Exhaust Vacuum,0.251386,1.0,0.169664,-0.046473,-0.289169
Ambient Pressure,-0.059887,0.169664,1.0,-0.165979,0.192873
Relative Humidity,-0.668119,-0.046473,-0.165979,1.0,0.168927
Energy Output,-0.526098,-0.289169,0.192873,0.168927,1.0


In [16]:
mid.mean()

Temperature            23.560180
Exhaust Vacuum         60.318846
Ambient Pressure     1012.011162
Relative Humidity      70.895201
Energy Output         445.116584
dtype: float64

In [18]:
lower.corr()

Unnamed: 0,Temperature,Exhaust Vacuum,Ambient Pressure,Relative Humidity,Energy Output
Temperature,1.0,0.205723,-0.128912,-0.665794,-0.399863
Exhaust Vacuum,0.205723,1.0,-0.042188,0.163341,-0.304651
Ambient Pressure,-0.128912,-0.042188,1.0,-0.138716,0.205633
Relative Humidity,-0.665794,0.163341,-0.138716,1.0,0.118138
Energy Output,-0.399863,-0.304651,0.205633,0.118138,1.0


In [19]:
lower.mean()

Temperature            28.094941
Exhaust Vacuum         69.092128
Ambient Pressure     1008.956639
Relative Humidity      66.011881
Energy Output         434.170577
dtype: float64