# Bonus: Temperature Analysis I

In [1]:
import pandas as pd
from datetime import datetime as dt
from scipy import stats

In [2]:
# "tobs" is "temperature observations"
temp_analysis_df = pd.read_csv('Resources/hawaii_measurements.csv')
temp_analysis_df.head()

Unnamed: 0,station,date,prcp,tobs
0,USC00519397,2010-01-01,0.08,65
1,USC00519397,2010-01-02,0.0,63
2,USC00519397,2010-01-03,0.0,74
3,USC00519397,2010-01-04,0.0,76
4,USC00519397,2010-01-06,,73


In [3]:
# Convert the date column format from string to datetime
temp_analysis_df["date"] = pd.to_datetime(temp_analysis_df["date"] ,format='%Y/%m/%d')
temp_analysis_df.head()

Unnamed: 0,station,date,prcp,tobs
0,USC00519397,2010-01-01,0.08,65
1,USC00519397,2010-01-02,0.0,63
2,USC00519397,2010-01-03,0.0,74
3,USC00519397,2010-01-04,0.0,76
4,USC00519397,2010-01-06,,73


In [4]:
# Set the date column as the DataFrame index
temp_analysis_df = temp_analysis_df.set_index(temp_analysis_df['date'])
temp_analysis_df

Unnamed: 0_level_0,station,date,prcp,tobs
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2010-01-01,USC00519397,2010-01-01,0.08,65
2010-01-02,USC00519397,2010-01-02,0.00,63
2010-01-03,USC00519397,2010-01-03,0.00,74
2010-01-04,USC00519397,2010-01-04,0.00,76
2010-01-06,USC00519397,2010-01-06,,73
...,...,...,...,...
2017-08-19,USC00516128,2017-08-19,0.09,71
2017-08-20,USC00516128,2017-08-20,,78
2017-08-21,USC00516128,2017-08-21,0.56,76
2017-08-22,USC00516128,2017-08-22,0.50,76


In [5]:
# Drop the date column
temp_analysis_df = temp_analysis_df.drop(columns='date')
temp_analysis_df

Unnamed: 0_level_0,station,prcp,tobs
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2010-01-01,USC00519397,0.08,65
2010-01-02,USC00519397,0.00,63
2010-01-03,USC00519397,0.00,74
2010-01-04,USC00519397,0.00,76
2010-01-06,USC00519397,,73
...,...,...,...
2017-08-19,USC00516128,0.09,71
2017-08-20,USC00516128,,78
2017-08-21,USC00516128,0.56,76
2017-08-22,USC00516128,0.50,76


### Compare June and December data across all years 

In [6]:
from scipy import stats

In [7]:
# Identify the average temperature for June
temp_analysis_june_df = temp_analysis_df[temp_analysis_df.index.month == 6]
temp_analysis_june_df.head(10)

Unnamed: 0_level_0,station,prcp,tobs
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2010-06-01,USC00519397,0.0,78
2010-06-02,USC00519397,0.01,76
2010-06-03,USC00519397,0.0,78
2010-06-04,USC00519397,0.0,76
2010-06-05,USC00519397,0.0,77
2010-06-06,USC00519397,0.0,78
2010-06-07,USC00519397,0.0,77
2010-06-08,USC00519397,0.0,78
2010-06-09,USC00519397,0.0,78
2010-06-10,USC00519397,0.0,79


In [8]:
# Identify the average temperature for December
temp_analysis_dec_df = temp_analysis_df[temp_analysis_df.index.month == 12]
temp_analysis_dec_df.head(10)

Unnamed: 0_level_0,station,prcp,tobs
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2010-12-01,USC00519397,0.04,76
2010-12-03,USC00519397,0.0,74
2010-12-04,USC00519397,0.0,74
2010-12-06,USC00519397,0.0,64
2010-12-07,USC00519397,0.0,64
2010-12-08,USC00519397,0.0,67
2010-12-09,USC00519397,0.0,77
2010-12-10,USC00519397,1.58,66
2010-12-11,USC00519397,0.77,69
2010-12-12,USC00519397,0.0,68


In [9]:
perc =[.20, .40, .60, .80]
temp_analysis_june_df.describe(percentiles = perc)

Unnamed: 0,prcp,tobs
count,1574.0,1700.0
mean,0.13636,74.944118
std,0.335731,3.257417
min,0.0,64.0
20%,0.0,72.0
40%,0.0,74.0
50%,0.02,75.0
60%,0.04,76.0
80%,0.17,78.0
max,4.43,85.0


In [10]:
perc =[.20, .40, .60, .80]
temp_analysis_dec_df.describe(percentiles = perc)

Unnamed: 0,prcp,tobs
count,1405.0,1517.0
mean,0.216819,71.041529
std,0.541399,3.74592
min,0.0,56.0
20%,0.0,68.0
40%,0.01,70.0
50%,0.03,71.0
60%,0.05,72.0
80%,0.22,74.0
max,6.42,83.0


In [11]:
june_temp = temp_analysis_june_df.tobs
june_temp


date
2010-06-01    78
2010-06-02    76
2010-06-03    78
2010-06-04    76
2010-06-05    77
              ..
2017-06-26    79
2017-06-27    74
2017-06-28    74
2017-06-29    76
2017-06-30    75
Name: tobs, Length: 1700, dtype: int64

In [12]:
dec_temp = temp_analysis_dec_df.tobs
dec_temp

date
2010-12-01    76
2010-12-03    74
2010-12-04    74
2010-12-06    64
2010-12-07    64
              ..
2016-12-27    71
2016-12-28    71
2016-12-29    69
2016-12-30    65
2016-12-31    65
Name: tobs, Length: 1517, dtype: int64

In [None]:
# Run paired t-test
stats.ttest_ind(june_temp,dec_temp)

### Analysis

In [None]:
The december mean temp = 71.041529 
The june mean temp of  = 74.944118. There is not much differeance in the mean of  June and december