In [1]:
import numpy as np
import pandas as pd

# Import Data 

## Russia_518_data.txt 

In [2]:
# import percipitation data with Pandas
df = pd.read_csv('../Data/Raw/Russia_518_data.txt', delim_whitespace=True, header=None)
df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13
0,20046,1958,1,1,0,-35.8,0,-34.0,0,-31.7,0,0.0,2,0
1,20046,1958,1,2,9,-99.9,9,-99.9,9,-99.9,9,-99.9,9,9
2,20046,1958,1,3,9,-99.9,9,-99.9,9,-99.9,9,-99.9,9,9
3,20046,1958,1,4,0,-40.0,0,-37.6,0,-33.0,0,0.0,2,0
4,20046,1958,1,5,0,-36.9,0,-34.8,0,-31.2,0,0.6,0,0


In [3]:
# assign column titles to Percipitation dataframe
df = df.rename(columns={0:'Station_ID', 1:'Year', 2:'Month',\
              3:'Day', 5:'Min Temp', 7:'Mean Temp', 9:'Max Temp',\
              11:'Percipitation'})
df.head()

Unnamed: 0,Station_ID,Year,Month,Day,4,Min Temp,6,Mean Temp,8,Max Temp,10,Percipitation,12,13
0,20046,1958,1,1,0,-35.8,0,-34.0,0,-31.7,0,0.0,2,0
1,20046,1958,1,2,9,-99.9,9,-99.9,9,-99.9,9,-99.9,9,9
2,20046,1958,1,3,9,-99.9,9,-99.9,9,-99.9,9,-99.9,9,9
3,20046,1958,1,4,0,-40.0,0,-37.6,0,-33.0,0,0.0,2,0
4,20046,1958,1,5,0,-36.9,0,-34.8,0,-31.2,0,0.6,0,0


In [4]:
# create dataframe-subset with relevant columns
df_sub = df[['Station_ID','Year','Month','Day','Percipitation']]

In [13]:
# output statistics
df_sub.describe()

Unnamed: 0,Station_ID,Year,Month,Day,Percipitation
count,14228252.0,14228252.0,14228252.0,14228252.0,14228252.0
mean,28609.979598,1968.973939,6.522891,15.729413,-4.008856
std,4367.063535,27.935297,3.448712,8.800061,22.98683
min,20046.0,1874.0,1.0,1.0,-99.9
25%,24790.0,1951.0,4.0,8.0,0.0
50%,28895.0,1972.0,7.0,16.0,0.0
75%,31538.0,1991.0,10.0,23.0,0.8
max,37663.0,2011.0,12.0,31.0,308.5


## Russia_518_inventory.txt

In [5]:
# import inventory data with Pandas
df_inv = pd.read_csv('../Data/Raw/Russia_518_inventory.txt', delim_whitespace=True, header=None)
df_inv.head()

Unnamed: 0,0,1,2,3,4,5,6,7
0,20046,80.62,58.05,21,1957,Polar,GMO,im.E.T.Krenkelja
1,20069,79.5,76.98,10,1945,Vize,,
2,20087,79.55,90.62,7,1940,Golomjannyj,,
3,20107,78.07,14.25,22,1940,Barencburg,,
4,20289,77.17,96.43,9,1940,Russkij,,


In [6]:
# assign column titles to Inventory dataframe
df_inv = df_inv.rename(columns={0:'Station_ID', 1:'Latitude', 2:'Longitude',\
              3:'Elevation', 4:'Initiation_Year'})
df_inv.head()

Unnamed: 0,Station_ID,Latitude,Longitude,Elevation,Initiation_Year,5,6,7
0,20046,80.62,58.05,21,1957,Polar,GMO,im.E.T.Krenkelja
1,20069,79.5,76.98,10,1945,Vize,,
2,20087,79.55,90.62,7,1940,Golomjannyj,,
3,20107,78.07,14.25,22,1940,Barencburg,,
4,20289,77.17,96.43,9,1940,Russkij,,


# For: Dr. Ye
## List of values that have Longitude > 180


In [7]:
df_inv[(df_inv.Longitude > 180)]

Unnamed: 0,Station_ID,Latitude,Longitude,Elevation,Initiation_Year,5,6,7
22,21982,70.98,181.52,2,1926,Ostrov,Vrangelja,
146,25173,68.9,180.63,2,1932,Mys,Shmidta,
149,25282,67.83,184.17,3,1961,Mys,Vankarem,
152,25372,67.02,181.07,139,1955,Amguema,87km,
153,25378,66.35,180.88,21,1961,Egvekinot,,
154,25399,66.17,190.17,5,1918,Mys,Uelen,
161,25594,64.42,186.77,25,1934,Buhta,Providenja,


# Time-Mean Percipitation Maps

## Functions

In [None]:
def extract_time_mean_values(start_date, end_date, df_sub):
    
    # extract date information
    start_year = start_date[0]
    start_month = start_date[1]
    start_day = start_date[2]
    end_year = end_date[0]
    end_month = end_date[1]
    end_day = end_date[2]

    # extract relevant data from start-date to end-date
    df_sub = df[['Station_ID','Year','Month','Day','Percipitation']]
    df_sub = df_sub[((df.Year == start_year)&(df.Month == start_month)&(df.Day >= start_day)) | 
                ((df.Year == end_year)&(df.Month == end_month) &(df.Day <= end_day))]            
    
    # remove N/A Percipitation data
    df_sub = df_sub[df_sub.Percipitation != -99.9]

    # find unique Station IDs
    stations = pd.unique(df_sub.Station_ID.ravel())

    # calculate mean percipitation for each station
    mean_percipitation = np.zeros(stations.size)
    for i in range(stations.size):
        mean_percipitation[i] = df_sub[df.Station_ID == stations[i]].Percipitation.mean()
        
    # combine stations and mean percipitation
    d = {'Station_ID': stations,\
        'Mean_Percipitation': mean_percipitation}
    df_data = pd.DataFrame(d)
    
    return df_data

In [None]:
def add_geographical_location(df_data, df_inv):
    
    # add latitude and longitude columns to df_data
    df_data_merge = pd.merge(df_data, df_inv, left_on='Station_ID', right_on='Station_ID', how='inner')\
        [['Station_ID','Latitude','Longitude','Mean_Percipitation']]
        
    # remove Longitude values > 180
    df_data_merge = df_data_merge[df_data_merge.Longitude <= 180]
    
    # multiply longitude and latitude values by 2 (convert to 1/2 degree units)
    df_data_merge.Longitude = df_data_merge.Longitude * 2
    df_data_merge.Latitude = df_data_merge.Latitude * 2
    
    # round longitude and latitude values
    df_data_merge.Longitude = (df_data_merge.Longitude).round(decimals=0)
    df_data_merge.Latitude = (df_data_merge.Latitude).round(decimals=0)
    
    return df_data_merge

In [8]:
def create_percipitation_map(df_data_merge):

    # create map (units = 0.5 degrees units)
    # latitude: 1 to 180 (i.e., 90N)
    # longitude: 1 to 360 (i.e., 180E)
    image = np.zeros((180,360))

    # convert pandas dataframe to numpy array
    data = df_data_merge.values

    # populate map
    for i in range(data.shape[0]):
        lat = 180 - data[i][1].astype(int)
        lon = data[i][2].astype(int)
        mean_p = data[i][3]
        image[lat][lon] = mean_p
    
    return image

In [53]:
def create_percipitation_csv(start_date, end_date, df_sub):
    
    df_data = extract_time_mean_values(start_date, end_date, df_sub);
    df_data_merge = add_geographical_location(df_data, df_inv)
    image = create_percipitation_map(df_data_merge)
    
    # export data to CSV file
    filename = '../Data/Processed/data_' + str(start_date[0]) + str(start_date[1]) + str(start_date[2]) +\
        '_to_' + str(end_date[0]) + str(end_date[1]) + str(end_date[2]) + '.csv'
        
    np.savetxt(filename, image, delimiter = ',') 

## Create average maps from: 2003 to 2011

In [54]:
df_data = create_percipitation_csv([2003, 1, 17],[2003, 3, 17], df_sub)
df_data = create_percipitation_csv([2004, 1,  8],[2004, 3, 17], df_sub)
df_data = create_percipitation_csv([2005, 1, 17],[2005, 3, 17], df_sub)
df_data = create_percipitation_csv([2006, 1, 17],[2006, 3, 17], df_sub)
df_data = create_percipitation_csv([2007, 1, 17],[2007, 3, 17], df_sub)
df_data = create_percipitation_csv([2008, 1, 17],[2008, 3, 17], df_sub)
df_data = create_percipitation_csv([2009, 1, 17],[2009, 3, 17], df_sub)
df_data = create_percipitation_csv([2010, 1, 17],[2010, 3, 17], df_sub)
df_data = create_percipitation_csv([2011, 2, 19],[2011, 3, 17], df_sub)