## Step 4 - Getting weather (temperature and precipitation) data and reformatting it with regexes

In [29]:
import pandas as pd

## According to the readme from https://www.ncdc.noaa.gov/data-access/land-based-station-data/land-based-datasets/climate-normals/1981-2010-normals-data temperatures are measured down to the tenth and precipitation numbers down to the hundreth.

In [30]:
#importing the csvs
monthly_temp_normals = pd.DataFrame.from_csv("weather/tavg.csv", index_col=None, header=None)
monthly_precip_normals = pd.DataFrame.from_csv("weather/prcp.csv", index_col=None, header=None)

In [31]:
#dropping Nan columns
monthly_precip_normals.drop(13,axis=1, inplace=True)
monthly_precip_normals.drop(14,axis=1, inplace=True)
monthly_precip_normals.drop(15,axis=1, inplace=True)
monthly_temp_normals.drop(13,axis=1, inplace=True)
monthly_temp_normals.drop(14,axis=1, inplace=True)
monthly_temp_normals.drop(15,axis=1, inplace=True)

In [32]:
monthly_precip_normals.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12
0,AQC00914000,2116R,2022S,1840S,1780R,1820R,1306R,1274R,1383R,1570S,2003S,2036R,2242R
1,AQW00061705,1448C,1266C,1166C,1102C,1062C,586C,647C,630C,763C,1011C,1130C,1452C
2,CAW00064757,160Q,205Q,200Q,286Q,359Q,310Q,366Q,238Q,229Q,209Q,338Q,272Q
3,CQC00914080,384P,442R,240R,503R,380R,504R,1016R,1242R,1165P,1099R,776R,588P
4,CQC00914801,524R,521R,305R,466R,401R,520R,1071R,1312R,1386R,1226R,838R,554R


In [34]:
#for monthly precipitation, applying appropriate transformations
for i in range(len(monthly_precip_normals)):
    for j in range(1,13):
        precip = monthly_precip_normals.ix[i,j]
        precip = precip[0:-1]
        if precip == -7777:
            precip = 0
        else:
            precip = float(precip)/100
        monthly_precip_normals.ix[i,j] = precip


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12
0,AQC00914000,21.16,20.22,18.4,17.8,18.2,13.06,12.74,13.83,15.7,20.03,20.36,22.42
1,AQW00061705,14.48,12.66,11.66,11.02,10.62,5.86,6.47,6.3,7.63,10.11,11.3,14.52
2,CAW00064757,1.6,2.05,2,2.86,3.59,3.1,3.66,2.38,2.29,2.09,3.38,2.72
3,CQC00914080,3.84,4.42,2.4,5.03,3.8,5.04,10.16,12.42,11.65,10.99,7.76,5.88
4,CQC00914801,5.24,5.21,3.05,4.66,4.01,5.2,10.71,13.12,13.86,12.26,8.38,5.54
5,CQC00914855,3.56,2.6,1.93,2.32,2.77,4.5,8.27,12.15,10.21,11.32,5.92,4.21
6,CQC00914874,4.11,3.3,2.4,2.71,3.36,5.05,10.43,12.45,12.46,11.2,6.97,4.24
7,FMC00914213,7.26,5.87,6.45,4.87,8.41,12.58,14.97,13.69,12.41,10.39,9.18,9.86
8,FMC00914325,10.21,10.27,11.53,12.12,11.63,13.17,13.16,10.51,9.28,8.1,9.68,8.74
9,FMC00914395,18.14,13.6,15.78,19.52,18.48,15.58,16.03,14.81,15.5,11.52,15.53,18.85


In [35]:
#for monthly temp, applying appropriate transformations
for i in range(len(monthly_temp_normals)):
    for j in range(1,13):
        temp = monthly_temp_normals.ix[i,j]
        temp = temp[0:-1]
        if temp == -7777:
            temp = 0
        else:
            temp = float(temp)/10
        monthly_temp_normals.ix[i,j] = temp
        

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12
0,AQC00914000,211.6,202.2,184,178,182,130.6,127.4,138.3,157,200.3,203.6,224.2
1,AQW00061705,144.8,126.6,116.6,110.2,106.2,58.6,64.7,63,76.3,101.1,113,145.2
2,CAW00064757,16,20.5,20,28.6,35.9,31,36.6,23.8,22.9,20.9,33.8,27.2
3,CQC00914080,38.4,44.2,24,50.3,38,50.4,101.6,124.2,116.5,109.9,77.6,58.8
4,CQC00914801,52.4,52.1,30.5,46.6,40.1,52,107.1,131.2,138.6,122.6,83.8,55.4
5,CQC00914855,35.6,26,19.3,23.2,27.7,45,82.7,121.5,102.1,113.2,59.2,42.1
6,CQC00914874,41.1,33,24,27.1,33.6,50.5,104.3,124.5,124.6,112,69.7,42.4
7,FMC00914213,72.6,58.7,64.5,48.7,84.1,125.8,149.7,136.9,124.1,103.9,91.8,98.6
8,FMC00914325,102.1,102.7,115.3,121.2,116.3,131.7,131.6,105.1,92.8,81,96.8,87.4
9,FMC00914395,181.4,136,157.8,195.2,184.8,155.8,160.3,148.1,155,115.2,155.3,188.5


In [36]:
#saving these changes
monthly_precip_normals.to_csv("prcp.csv")
monthly_temp_normals.to_csv("tavg.csv")