# Precipitation Data of Credit River

The goal of this notebook is to collect the precipitation data of Credit River and analyze it. The source data is available on https://lamps.math.yorku.ca/OntarioClimate/HistoricalObservations.htm#/.

In [1]:
import pandas as pd

In [2]:
df = pd.read_csv('Ontario_Data/Precipitation/daily_total_precipitation_2010.csv').drop(['ID'], axis='columns')
df.head(3)

Unnamed: 0,Longitude,Latitude,01-01,01-02,01-03,01-04,01-05,01-06,01-07,01-08,...,12-22,12-23,12-24,12-25,12-26,12-27,12-28,12-29,12-30,12-31
0,-89.0,56.875,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,-88.875,56.875,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,-89.25,56.75,0.1,0.0,0.0,0.1,0.2,0.5,0.0,0.0,...,1.6,0.3,0.9,0.5,0.0,0.1,0.4,1.5,0.1,0.1


In [3]:
coordeninates = df[['Longitude', 'Latitude']].values
coordeninates

array([[-89.   ,  56.875],
       [-88.875,  56.875],
       [-89.25 ,  56.75 ],
       ...,
       [-82.375,  41.75 ],
       [-82.25 ,  41.75 ],
       [-82.125,  41.75 ]])

In [62]:
hydrometric_locations = {'02HB013': [(43, 53, 28), (80,3,44)],
                         '02HB001': [(43, 50, 9), (80,1,22)],
                         '02HB031': [(43, 47, 24), (80, 8, 36)],
                         '02HB018': [(43, 46, 24), (79, 55, 36)],
                         '02HB008': [(43, 38, 47), (79, 51, 58)],
                         '02HB025': [(43, 38, 51), (79, 51, 21)],
                         '02HB029': [(43, 34, 56), (79, 42, 31)]}

In [63]:
def dms_to_decimal(degrees, minutes, seconds, direction):
    decimal_degrees = degrees + (minutes / 60) + (seconds / 3600)
    if direction in ['S', 'W']:
        decimal_degrees *= -1
    return decimal_degrees

In [64]:
for location in hydrometric_locations:
    coord1 = hydrometric_locations[location][0]
    coord2 = hydrometric_locations[location][1]
    decimal = [dms_to_decimal(coord2[0], coord2[1], coord2[2], 'W'), dms_to_decimal(coord1[0], coord1[1], coord1[2], 'N')]
    hydrometric_locations[location] = decimal

In [65]:
hydrometric_locations

{'02HB013': [-80.06222222222222, 43.89111111111111],
 '02HB001': [-80.02277777777778, 43.83583333333333],
 '02HB031': [-80.14333333333335, 43.79],
 '02HB018': [-79.92666666666668, 43.77333333333333],
 '02HB008': [-79.86611111111111, 43.646388888888886],
 '02HB025': [-79.85583333333332, 43.6475],
 '02HB029': [-79.70861111111111, 43.58222222222223]}

In [66]:
def calc_distance(coord1, coord2):
    return coord1 - coord2

In [113]:
from math import sqrt
data = []
precipitation_locations = []
for location in hydrometric_locations:
    min = 100
    min_coord = None
    for coord in coordeninates:
        diff = abs(calc_distance(coord, hydrometric_locations[location]))
        distance = (sqrt(diff[0]**2 + diff[1]**2))
        if(distance < min):
            min = distance
            min_coord = coord
    precipitation_locations.append((min_coord[0], min_coord[1]))
    data.append([location, hydrometric_locations[location], min_coord])

In [141]:
df['Coord'] = list(zip(df['Longitude'], df['Latitude']))
precipitation_stations = df[(df.Coord.isin(precipitation_locations))]

In [175]:
from datetime import datetime
def cvt_datetime(date, year):
    return datetime(year, int(date.split('-')[0]), int(date.split('-')[1]))

In [163]:
df_precipitation = precipitation_stations.set_index('Coord').drop(['Longitude', 'Latitude'], axis='columns').transpose().reset_index()
df_precipitation['index'] = df_precipitation['index'].apply(cvt_datetime, args=(2010,))
df_precipitation.head(3)

Coord,index,"(-80.0, 43.875)","(-80.125, 43.75)","(-79.875, 43.75)","(-79.875, 43.625)","(-79.75, 43.625)"
0,2010-01-01,2.3,2.0,2.0,1.5,1.5
1,2010-01-02,0.8,0.7,0.5,0.3,0.3
2,2010-01-03,2.2,1.7,1.9,1.5,1.8


In [164]:
df_final = pd.DataFrame(columns=['index', (-80.0, 43.875), (-80.125, 43.75), (-79.875, 43.75), (-79.875, 43.625), (-79.75, 43.625)])
df_final = pd.concat([df_final, df_precipitation], ignore_index=False)
df_final.head(3)

  df_final = pd.concat([df_final, df_precipitation], ignore_index=False)


Unnamed: 0,index,"(-80.0, 43.875)","(-80.125, 43.75)","(-79.875, 43.75)","(-79.875, 43.625)","(-79.75, 43.625)"
0,2010-01-01,2.3,2.0,2.0,1.5,1.5
1,2010-01-02,0.8,0.7,0.5,0.3,0.3
2,2010-01-03,2.2,1.7,1.9,1.5,1.8


In [182]:
import os
df_final = pd.DataFrame(columns=['index', (-80.0, 43.875), (-80.125, 43.75), (-79.875, 43.75), (-79.875, 43.625), (-79.75, 43.625)])
dirpath = 'Ontario_Data/Precipitation'
for file in os.listdir(dirpath):
    print(f'FILE: {file}')
    year = int(file[26:30])
    filepath = os.path.join(dirpath, file)
    df = pd.read_csv(filepath, sep=',').drop(['ID'], axis='columns')

    # Create Coord column
    df['Coord'] = list(zip(df['Longitude'], df['Latitude']))
    # Get the found precipitation stations
    precipitation_stations = df[(df.Coord.isin(precipitation_locations))]
    # Transpose the df
    df_precipitation = precipitation_stations.set_index('Coord').drop(['Longitude', 'Latitude'], axis='columns').transpose().reset_index()
    df_precipitation['index'] = df_precipitation['index'].apply(cvt_datetime, args=(year,))
    # Concat to df_final
    df_final = pd.concat([df_final, df_precipitation], ignore_index=False)
df_final

FILE: daily_total_precipitation_2010.csv


  df_final = pd.concat([df_final, df_precipitation], ignore_index=False)


FILE: daily_total_precipitation_2011.csv
FILE: daily_total_precipitation_2012.csv
FILE: daily_total_precipitation_2013.csv
FILE: daily_total_precipitation_2014.csv
FILE: daily_total_precipitation_2015.csv
FILE: daily_total_precipitation_2016.csv
FILE: daily_total_precipitation_2017.csv
FILE: daily_total_precipitation_2018.csv


Unnamed: 0,index,"(-80.0, 43.875)","(-80.125, 43.75)","(-79.875, 43.75)","(-79.875, 43.625)","(-79.75, 43.625)"
0,2010-01-01,2.3,2.0,2.0,1.5,1.5
1,2010-01-02,0.8,0.7,0.5,0.3,0.3
2,2010-01-03,2.2,1.7,1.9,1.5,1.8
3,2010-01-04,0.7,0.6,0.6,0.6,0.7
4,2010-01-05,1.7,1.7,1.8,2.0,2.1
...,...,...,...,...,...,...
360,2018-12-27,0.3,0.3,0.2,0.2,0.1
361,2018-12-28,5.0,5.8,5.2,6.0,6.8
362,2018-12-29,0.7,0.7,0.4,0.3,0.2
363,2018-12-30,0.9,1.0,0.8,0.8,0.8


In [185]:
df_final = df_final.rename(columns={'index': 'DATE'})
df_final

Unnamed: 0,DATE,"(-80.0, 43.875)","(-80.125, 43.75)","(-79.875, 43.75)","(-79.875, 43.625)","(-79.75, 43.625)"
0,2010-01-01,2.3,2.0,2.0,1.5,1.5
1,2010-01-02,0.8,0.7,0.5,0.3,0.3
2,2010-01-03,2.2,1.7,1.9,1.5,1.8
3,2010-01-04,0.7,0.6,0.6,0.6,0.7
4,2010-01-05,1.7,1.7,1.8,2.0,2.1
...,...,...,...,...,...,...
360,2018-12-27,0.3,0.3,0.2,0.2,0.1
361,2018-12-28,5.0,5.8,5.2,6.0,6.8
362,2018-12-29,0.7,0.7,0.4,0.3,0.2
363,2018-12-30,0.9,1.0,0.8,0.8,0.8


In [186]:
df_final.to_csv('precipitation_daily.csv', sep=',', index=False)