In [2]:
import matplotlib.pyplot as plt
import pandas as pd
import scipy.stats as st
import numpy as np
from scipy.stats import linregress

In [3]:
# save path to variable
climate_path = "CA042239T.csv"

# Read the climate data
climate_results = pd.read_csv(climate_path)

climate_results 

Unnamed: 0,State_id,YEAR,Month,TMAX (F),TMEAN (F),TMIN (F)
0,'042239',1887,1,.,.,.
1,'042239',1887,2,.,.,.
2,'042239',1887,3,.,.,.
3,'042239',1887,4,.,.,.
4,'042239',1887,5,.,.,.
...,...,...,...,...,...,...
1531,'042239',2014,8,83.2,68.3,53.4
1532,'042239',2014,9,82.5,66.7,50.8
1533,'042239',2014,10,75.4,58.6,41.8
1534,'042239',2014,11,62.3,49.1,35.8


In [4]:
# checking for NAs
climate_results.isna().sum() 

State_id      0
YEAR          0
Month         0
TMAX (F)      0
TMEAN (F)     0
TMIN (F)      0
dtype: int64

In [5]:
# checking data types
climate_results.dtypes

State_id      object
YEAR           int64
Month          int64
TMAX (F)      object
TMEAN (F)     object
TMIN (F)      object
dtype: object

In [6]:
# checking columns
climate_results.columns

Index(['State_id  ', 'YEAR  ', 'Month  ', 'TMAX (F)', 'TMEAN (F)',
       'TMIN (F) '],
      dtype='object')

In [7]:
# revising extra spaces out of column names
dict = {'State_id  ': 'State_id',
        'YEAR  ': 'YEAR',
        'Month  ': 'Month'}

climate_redux = climate_results.rename(columns=dict)
climate_redux.columns

Index(['State_id', 'YEAR', 'Month', 'TMAX (F)', 'TMEAN (F)', 'TMIN (F) '], dtype='object')

In [8]:
# Grabbing data between years 1961 - 1990
climate_results_sixtyone = climate_redux[climate_redux['YEAR'].between(1961, 1990)]
climate_results_sixtyone

Unnamed: 0,State_id,YEAR,Month,TMAX (F),TMEAN (F),TMIN (F)
888,'042239',1961,1,53.8,40.2,26.7
889,'042239',1961,2,55.7,42.2,28.8
890,'042239',1961,3,53,42.4,31.7
891,'042239',1961,4,64.3,49.7,35
892,'042239',1961,5,64.2,50.8,37.4
...,...,...,...,...,...,...
1243,'042239',1990,8,81.9,67,52
1244,'042239',1990,9,79.6,64.3,48.9
1245,'042239',1990,10,73.7,56.2,38.7
1246,'042239',1990,11,58.9,45.9,32.8


In [9]:
# Using .loc to as alternate method
sixtyone = climate_redux.loc[(climate_redux.YEAR > 1960) & (climate_redux.YEAR < 1991), :]
sixtyone

Unnamed: 0,State_id,YEAR,Month,TMAX (F),TMEAN (F),TMIN (F)
888,'042239',1961,1,53.8,40.2,26.7
889,'042239',1961,2,55.7,42.2,28.8
890,'042239',1961,3,53,42.4,31.7
891,'042239',1961,4,64.3,49.7,35
892,'042239',1961,5,64.2,50.8,37.4
...,...,...,...,...,...,...
1243,'042239',1990,8,81.9,67,52
1244,'042239',1990,9,79.6,64.3,48.9
1245,'042239',1990,10,73.7,56.2,38.7
1246,'042239',1990,11,58.9,45.9,32.8


In [10]:
# adjust TMAX format to float
import warnings
warnings.filterwarnings("ignore")
sixtyone['TMAX (F)'] = sixtyone['TMAX (F)'].astype('float')
sixtyone.dtypes

State_id      object
YEAR           int64
Month          int64
TMAX (F)     float64
TMEAN (F)     object
TMIN (F)      object
dtype: object