In [1]:
# Importing dependencies 
import pandas as pd
from datetime import datetime as dt

In [2]:
# "tobs" is "temperature observations"
hawaii_df = pd.read_csv('./Resources/hawaii_measurements.csv')
hawaii_df.head()

Unnamed: 0,station,date,prcp,tobs
0,USC00519397,2010-01-01,0.08,65
1,USC00519397,2010-01-02,0.0,63
2,USC00519397,2010-01-03,0.0,74
3,USC00519397,2010-01-04,0.0,76
4,USC00519397,2010-01-06,,73


In [3]:
# Convert the date column format from string to datetime
hawaii_df.date = pd.to_datetime(hawaii_df.date, infer_datetime_format=True)

In [4]:
# Set the date column as the DataFrame index
hawaii_df.set_index(['date'])

Unnamed: 0_level_0,station,prcp,tobs
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2010-01-01,USC00519397,0.08,65
2010-01-02,USC00519397,0.00,63
2010-01-03,USC00519397,0.00,74
2010-01-04,USC00519397,0.00,76
2010-01-06,USC00519397,,73
...,...,...,...
2017-08-19,USC00516128,0.09,71
2017-08-20,USC00516128,,78
2017-08-21,USC00516128,0.56,76
2017-08-22,USC00516128,0.50,76


In [5]:
from scipy import stats

In [6]:
# Filter data for desired months
june_df = hawaii_df[hawaii_df['date'].dt.month_name() == "June"]
dec_df = hawaii_df[hawaii_df['date'].dt.month_name() == "December"]
combined_months = pd.concat([june_df,dec_df])
combined_months

Unnamed: 0,station,date,prcp,tobs
133,USC00519397,2010-06-01,0.00,78
134,USC00519397,2010-06-02,0.01,76
135,USC00519397,2010-06-03,0.00,78
136,USC00519397,2010-06-04,0.00,76
137,USC00519397,2010-06-05,0.00,77
...,...,...,...,...
19323,USC00516128,2016-12-27,0.14,71
19324,USC00516128,2016-12-28,0.14,71
19325,USC00516128,2016-12-29,1.03,69
19326,USC00516128,2016-12-30,2.37,65


In [7]:
 # Identify the average temperature for June
june_mean = june_df["tobs"].mean
print(f'The Average Temp for June is: {june_mean()}')

The Average Temp for June is: 74.94411764705882


In [8]:
 # Identify the average temperature for December
dec_mean = dec_df["tobs"].mean
print(f'The Average Temp for December is: {dec_mean()}')

The Average Temp for December is: 71.04152933421226


In [24]:
 # Create collections of temperature data
june_temp_new = combined_months.tobs
dec_temp_new = combined_months.tobs
combined_temp = pd.concat([june_temp_new,dec_temp_new])
combined_temp

133      78
134      76
135      78
136      76
137      77
         ..
19323    71
19324    71
19325    69
19326    65
19327    65
Name: tobs, Length: 6434, dtype: int64

In [27]:
 # Create collections of temperature data for T-test
june_temp = june_df.tobs
dec_temp = dec_df.tobs

In [28]:
combined_temp.describe()

count    6434.000000
mean       73.103823
std         4.001761
min        56.000000
25%        71.000000
50%        73.000000
75%        76.000000
max        85.000000
Name: tobs, dtype: float64

In [35]:
# Run a T-test
stats.ttest_ind(june_temp, dec_temp)

Ttest_indResult(statistic=31.60372399000329, pvalue=3.9025129038616655e-191)