# Bonus: Temperature Analysis I

In [61]:
import pandas as pd
from datetime import datetime as dt

In [62]:
# "tobs" is "temperature observations"
df = pd.read_csv('Resources/hawaii_measurements.csv')
df.head()

Unnamed: 0,station,date,prcp,tobs
0,USC00519397,2010-01-01,0.08,65
1,USC00519397,2010-01-02,0.0,63
2,USC00519397,2010-01-03,0.0,74
3,USC00519397,2010-01-04,0.0,76
4,USC00519397,2010-01-06,,73


In [63]:
# Convert the date column format from string to datetime
df['date'] = pd.to_datetime(df['date'])
df

Unnamed: 0,station,date,prcp,tobs
0,USC00519397,2010-01-01,0.08,65
1,USC00519397,2010-01-02,0.00,63
2,USC00519397,2010-01-03,0.00,74
3,USC00519397,2010-01-04,0.00,76
4,USC00519397,2010-01-06,,73
...,...,...,...,...
19545,USC00516128,2017-08-19,0.09,71
19546,USC00516128,2017-08-20,,78
19547,USC00516128,2017-08-21,0.56,76
19548,USC00516128,2017-08-22,0.50,76


In [64]:
# Set the date column as the DataFrame index
updated_df= df.set_index(['date'], drop=False)
updated_df.head()

Unnamed: 0_level_0,station,date,prcp,tobs
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2010-01-01,USC00519397,2010-01-01,0.08,65
2010-01-02,USC00519397,2010-01-02,0.0,63
2010-01-03,USC00519397,2010-01-03,0.0,74
2010-01-04,USC00519397,2010-01-04,0.0,76
2010-01-06,USC00519397,2010-01-06,,73


In [65]:
# Drop the date column
updated_df.drop(columns = ['date'], inplace = True)
updated_df.head()

Unnamed: 0_level_0,station,prcp,tobs
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2010-01-01,USC00519397,0.08,65
2010-01-02,USC00519397,0.0,63
2010-01-03,USC00519397,0.0,74
2010-01-04,USC00519397,0.0,76
2010-01-06,USC00519397,,73


### Compare June and December data across all years 

In [66]:
from scipy import stats

In [70]:
# Filter data for desired months
june_df = updated_df[updated_df.index.month.isin([6])]
june_df.head()

Unnamed: 0_level_0,station,prcp,tobs
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2010-06-01,USC00519397,0.0,78
2010-06-02,USC00519397,0.01,76
2010-06-03,USC00519397,0.0,78
2010-06-04,USC00519397,0.0,76
2010-06-05,USC00519397,0.0,77


In [78]:
# Identify the average temperature for June
avg_temp_june = june_df.groupby(['station']).mean()['tobs']
avg_temp_june

station
USC00511918    74.139394
USC00513117    74.050847
USC00514830    76.005376
USC00516128    71.937220
USC00517948    76.655405
USC00518838    73.394737
USC00519281    73.271186
USC00519397    77.559322
USC00519523    76.668103
Name: tobs, dtype: float64

In [77]:
# Identify the average temperature for December
dec_df = updated_df[updated_df.index.month.isin([12])]
avg_temp_dec = dec_df.groupby(['station']).mean()['tobs']
avg_temp_dec

station
USC00511918    69.684211
USC00513117    71.069444
USC00514830    73.224719
USC00516128    69.291262
USC00517948    71.834862
USC00518838    72.421053
USC00519281    69.903226
USC00519397    71.109524
USC00519523    72.433333
Name: tobs, dtype: float64

In [79]:
# Create collections of temperature data
temp_df = pd.DataFrame({"Avg. Temp June": avg_temp_june,\
                       "Avg. Temp Dec": avg_temp_dec})
temp_df

Unnamed: 0_level_0,Avg. Temp June,Avg. Temp Dec
station,Unnamed: 1_level_1,Unnamed: 2_level_1
USC00511918,74.139394,69.684211
USC00513117,74.050847,71.069444
USC00514830,76.005376,73.224719
USC00516128,71.93722,69.291262
USC00517948,76.655405,71.834862
USC00518838,73.394737,72.421053
USC00519281,73.271186,69.903226
USC00519397,77.559322,71.109524
USC00519523,76.668103,72.433333


In [80]:
# Run paired t-test
stats.ttest_rel(temp_df['Avg. Temp June'], temp_df['Avg. Temp Dec'])

Ttest_relResult(statistic=6.95696617044294, pvalue=0.00011759380231523222)

### Analysis

The analysis uses a paired t-test because the two sets of data are about the same subject and only separated by time. The difference between the average temperatures in June vs. those in December is statistically significant with a t-value of 6.96. The p-value of 0.00012 indicates that there is strong evidence to reject the null hypothesis, which states that there is no relationship between the average temperatures in June and those in December.