# General Operations on weather data and merging with MRT

import cdsapi

c = cdsapi.Client()

c.retrieve(
    'reanalysis-era5-single-levels',
    {
        'product_type': 'reanalysis',
        'variable': [
            '10m_u_component_of_wind', '2m_dewpoint_temperature', '2m_temperature',
            'downward_uv_radiation_at_the_surface',
        ],
        'year': [
            '2010', '2011', '2012', '2013',
        ],
        'month': [
            '05', '06', '07',
            '08', '09',
        ],
        'day': [
            '01', '02', '03',
            '04', '05', '06',
            '07', '08', '09',
            '10', '11', '12',
            '13', '14', '15',
            '16', '17', '18',
            '19', '20', '21',
            '22', '23', '24',
            '25', '26', '27',
            '28', '29', '30',
            '31',
        ],
        'time': [
            '02:00', '11:00', '16:00',
            '23:00',
        ],
        'area': [
            71.2, -10, 37,
            30,
        ],
        'format': 'netcdf',
    },
    'download10_13.nc')


In [1]:
import xarray as xr
import numpy as np
import pandas as pd



In [2]:
ds = xr.open_dataset('C:/Users/benhu/MasterThesisRawData/download10_13.nc')
df_13 = ds.to_dataframe()

ecCodes library not found using ['eccodes', 'libeccodes.so', 'libeccodes']


In [3]:
# have latitude and longitude as columns
df_13 = df_13.reset_index(level=['longitude', 'latitude', 'time'])

In [4]:
df_13

Unnamed: 0,longitude,latitude,time,u10,d2m,t2m,uvb
0,-10.0,71.0,2010-05-01 02:00:00,1.747800,264.579041,269.654572,0.015625
1,-10.0,71.0,2010-05-01 11:00:00,-0.100240,264.870331,269.875000,149345.843750
2,-10.0,71.0,2010-05-01 16:00:00,1.554787,265.301697,269.336090,126993.281250
3,-10.0,71.0,2010-05-01 23:00:00,1.232360,264.432129,269.217194,41.093750
4,-10.0,71.0,2010-05-02 02:00:00,1.165064,263.917908,269.313507,0.015625
...,...,...,...,...,...,...,...
53995531,30.0,37.0,2013-09-29 23:00:00,0.719878,278.056549,283.910004,0.015625
53995532,30.0,37.0,2013-09-30 02:00:00,0.182253,278.257263,282.557098,0.015625
53995533,30.0,37.0,2013-09-30 11:00:00,0.723576,277.514984,296.677155,274784.593750
53995534,30.0,37.0,2013-09-30 16:00:00,0.060234,277.608093,295.002289,10703.765625


In [5]:
df_13.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 53995536 entries, 0 to 53995535
Data columns (total 7 columns):
 #   Column     Dtype         
---  ------     -----         
 0   longitude  float64       
 1   latitude   float64       
 2   time       datetime64[ns]
 3   u10        float32       
 4   d2m        float32       
 5   t2m        float32       
 6   uvb        float32       
dtypes: datetime64[ns](1), float32(4), float64(2)
memory usage: 2.0 GB


In [6]:
#df['appTemp'] = df.apply(lambda row: -2.653+(0.994*df['t2m'])+(0.368*df['d2m']^2), axis=1)
#df.apply(lambda row: row.a + row.b, axis=1)

In [7]:
# specify the coordinates to keep according to European geography
lon = list(np.arange(-10, 31, 0.75))
lat = list(np.arange(37, 72, 0.75))

In [8]:
#lon = list(range(-10, 31))
#lat = list(range(37, 72))

In [9]:
# subset data to coordinates of Europe
df_13 = df_13[df_13['longitude'].isin(lon) & df_13['latitude'].isin(lat)]

In [10]:
df_13.shape

(6080832, 7)

In [11]:
# conver coordinates to string in order to from them to a tuple in a later step
df_13['latitude'] = df_13['latitude'].astype(str)
df_13['longitude'] = df_13['longitude'].astype(str)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_13['latitude'] = df_13['latitude'].astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_13['longitude'] = df_13['longitude'].astype(str)


In [12]:
df_13.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 6080832 entries, 2448 to 53660159
Data columns (total 7 columns):
 #   Column     Dtype         
---  ------     -----         
 0   longitude  object        
 1   latitude   object        
 2   time       datetime64[ns]
 3   u10        float32       
 4   d2m        float32       
 5   t2m        float32       
 6   uvb        float32       
dtypes: datetime64[ns](1), float32(4), object(2)
memory usage: 278.4+ MB


In [13]:
# create unique column for each location by combining latitude and longitude
df_13['lat_long'] = df_13[['latitude', 'longitude']].apply(tuple, axis=1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_13['lat_long'] = df_13[['latitude', 'longitude']].apply(tuple, axis=1)


In [14]:
df_13.head()

Unnamed: 0,longitude,latitude,time,u10,d2m,t2m,uvb,lat_long
2448,-10.0,70.75,2010-05-01 02:00:00,1.804003,264.905365,269.832458,0.015625,"(70.75, -10.0)"
2449,-10.0,70.75,2010-05-01 11:00:00,-0.762843,265.167603,270.009491,137731.28125,"(70.75, -10.0)"
2450,-10.0,70.75,2010-05-01 16:00:00,0.654801,265.55368,269.941803,135464.53125,"(70.75, -10.0)"
2451,-10.0,70.75,2010-05-01 23:00:00,1.252327,264.766113,269.361237,6.859375,"(70.75, -10.0)"
2452,-10.0,70.75,2010-05-02 02:00:00,1.017902,264.198944,269.483612,0.015625,"(70.75, -10.0)"


In [15]:
# extract date in separate column
df_13['date'] = df_13['time'].dt.strftime('%Y-%m-%d')
# extract time in separate column
df_13['clock'] = df_13['time'].dt.strftime('%H:%M:%S')
#subdf = subdf.drop(['time'], axis=1)
#subdf

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_13['date'] = df_13['time'].dt.strftime('%Y-%m-%d')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_13['clock'] = df_13['time'].dt.strftime('%H:%M:%S')


In [16]:
# get temperature in celsius
df_13['t2m'] = df_13['t2m']-273.15
df_13['d2m'] = df_13['d2m']-273.15

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_13['t2m'] = df_13['t2m']-273.15
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_13['d2m'] = df_13['d2m']-273.15


In [17]:
# get apparent temperature
df_13['apparent_temperature'] = -2.653+(0.994*df_13['t2m'])+(0.0153*df_13['d2m']**2)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_13['apparent_temperature'] = -2.653+(0.994*df_13['t2m'])+(0.0153*df_13['d2m']**2)


In [18]:
df_13

Unnamed: 0,longitude,latitude,time,u10,d2m,t2m,uvb,lat_long,date,clock,apparent_temperature
2448,-10.0,70.75,2010-05-01 02:00:00,1.804003,-8.244635,-3.317542,0.015625,"(70.75, -10.0)",2010-05-01,02:00:00,-4.910634
2449,-10.0,70.75,2010-05-01 11:00:00,-0.762843,-7.982397,-3.140509,137731.281250,"(70.75, -10.0)",2010-05-01,11:00:00,-4.799770
2450,-10.0,70.75,2010-05-01 16:00:00,0.654801,-7.596320,-3.208197,135464.531250,"(70.75, -10.0)",2010-05-01,16:00:00,-4.959076
2451,-10.0,70.75,2010-05-01 23:00:00,1.252327,-8.383887,-3.788763,6.859375,"(70.75, -10.0)",2010-05-01,23:00:00,-5.343601
2452,-10.0,70.75,2010-05-02 02:00:00,1.017902,-8.951056,-3.666388,0.015625,"(70.75, -10.0)",2010-05-02,02:00:00,-5.071532
...,...,...,...,...,...,...,...,...,...,...,...
53660155,29.75,37.0,2013-09-29 23:00:00,-0.546166,9.422662,11.818719,0.015625,"(37.0, 29.75)",2013-09-29,23:00:00,10.453242
53660156,29.75,37.0,2013-09-30 02:00:00,-0.469996,7.437524,11.221674,0.015625,"(37.0, 29.75)",2013-09-30,02:00:00,9.347690
53660157,29.75,37.0,2013-09-30 11:00:00,2.489530,4.378656,23.800500,283748.906250,"(37.0, 29.75)",2013-09-30,11:00:00,21.298039
53660158,29.75,37.0,2013-09-30 16:00:00,1.040826,8.561639,21.171045,11265.328125,"(37.0, 29.75)",2013-09-30,16:00:00,19.512534


In [19]:
# subset data per hour
sub2am = df_13[df_13['clock'] == '02:00:00']
sub11am = df_13[df_13['clock'] == '11:00:00']
sub16am = df_13[df_13['clock'] == '16:00:00']
sub23am = df_13[df_13['clock'] == '23:00:00']

In [20]:
# rename colums to indicate hours
sub2am = sub2am.rename(columns={"t2m": "temperature_2AM", 'd2m': 'dew_point_2AM', 'uvb': 'uvb_2AM', 'u10': 'wind_2AM', "apparent_temperature": 'apparent_temperature_2AM'})
sub11am = sub11am.rename(columns={"t2m": "temperature_11AM", 'd2m': 'dew_point_11AM', 'uvb': 'uvb_11AM', 'u10': 'wind_11AM', "apparent_temperature": 'apparent_temperature_11AM'})
sub16am = sub16am.rename(columns={"t2m": "temperature_4PM", 'd2m': 'dew_point_4PM', 'uvb': 'uvb_4PM', 'u10': 'wind_4PM', "apparent_temperature": 'apparent_temperature_4PM'})
sub23am = sub23am.rename(columns={"t2m": "temperature_11PM", 'd2m': 'dew_point_11PM', 'uvb': 'uvb_11PM', 'u10': 'wind_11PM', "apparent_temperature": 'apparent_temperature_11PM'})

In [21]:
# drop irrelevant variables from subset
sub2am = sub2am.drop(['clock','time'],1)
sub11am = sub11am.drop(['clock','time','lat_long'],1)
sub16am = sub16am.drop(['clock','time','lat_long'],1)
sub23am = sub23am.drop(['clock','time','lat_long'],1)

In [22]:
# merging subsets of data per hour
df1 = sub2am.merge(sub11am, how='inner', on=['date','latitude', 'longitude'])
df2 = df1.merge(sub16am, how='inner', on=['date','latitude', 'longitude'])
final00_13 = df2.merge(sub23am, how='inner', on=['date','latitude', 'longitude'])
final00_13.head()

Unnamed: 0,longitude,latitude,wind_2AM,dew_point_2AM,temperature_2AM,uvb_2AM,lat_long,date,apparent_temperature_2AM,wind_11AM,...,wind_4PM,dew_point_4PM,temperature_4PM,uvb_4PM,apparent_temperature_4PM,wind_11PM,dew_point_11PM,temperature_11PM,uvb_11PM,apparent_temperature_11PM
0,-10.0,70.75,1.804003,-8.244635,-3.317542,0.015625,"(70.75, -10.0)",2010-05-01,-4.910634,-0.762843,...,0.654801,-7.59632,-3.208197,135464.53125,-4.959076,1.252327,-8.383887,-3.788763,6.859375,-5.343601
1,-10.0,70.75,1.017902,-8.951056,-3.666388,0.015625,"(70.75, -10.0)",2010-05-02,-5.071532,2.053218,...,4.696233,-8.682837,-2.695319,157502.078125,-4.178654,3.597318,-7.683447,-2.593787,89.03125,-4.327983
2,-10.0,70.75,2.878513,-8.265143,-2.668402,0.015625,"(70.75, -10.0)",2010-05-03,-4.260209,-0.356851,...,-1.858059,-8.166052,-2.532172,172075.078125,-4.149707,-4.73476,-7.03681,-1.539404,246.546875,-3.425562
3,-10.0,70.75,-4.627531,-6.45426,-1.165381,0.015625,"(70.75, -10.0)",2010-05-04,-3.174029,-7.889516,...,-8.841269,-2.512152,0.489526,148976.046875,-2.069854,-7.910963,-3.74903,-0.649023,458.84375,-3.083084
4,-10.0,70.75,-6.166454,-4.282019,-0.903296,0.015625,"(70.75, -10.0)",2010-05-05,-3.27034,-1.188801,...,-0.382734,-2.849554,0.480859,161864.390625,-2.05079,0.600817,-1.575079,0.810602,664.28125,-1.809304


In [23]:
#final0002.to_csv('data_00_02.csv')
#final0002 = pd.read_csv('data_00_02.csv', index_col=0)

# Merge Locations with weather data

In [24]:
# read locations data
locations = pd.read_csv('locations1.csv', index_col=0)
locations.head()

Unnamed: 0,lat,lon,country,NUTS1,NUTS2,NUTS3
22,54.25,-10.0,IE,IE0,IE04,IE042
23,53.5,-10.0,IE,IE0,IE04,IE042
25,52.0,-10.0,IE,IE0,IE05,IE053
68,54.25,-9.25,IE,IE0,IE04,IE042
69,53.5,-9.25,IE,IE0,IE04,IE042


In [25]:
# change coordinates to string
locations['lat'] = locations['lat'].astype(str)
locations['lon'] = locations['lon'].astype(str)

In [26]:
# be sure to also have coordinates on weather data as string
final00_13['latitude'] = final00_13['latitude'].astype(str)
final00_13['longitude'] = final00_13['longitude'].astype(str)
# merging locations with weather data
df_weather_13 = pd.merge(final00_13, locations,  how='left', left_on=['latitude','longitude'], right_on = ['lat','lon']).drop(['lat', 'lon'], axis = 1)
df_weather_13.head()

Unnamed: 0,longitude,latitude,wind_2AM,dew_point_2AM,temperature_2AM,uvb_2AM,lat_long,date,apparent_temperature_2AM,wind_11AM,...,apparent_temperature_4PM,wind_11PM,dew_point_11PM,temperature_11PM,uvb_11PM,apparent_temperature_11PM,country,NUTS1,NUTS2,NUTS3
0,-10.0,70.75,1.804003,-8.244635,-3.317542,0.015625,"(70.75, -10.0)",2010-05-01,-4.910634,-0.762843,...,-4.959076,1.252327,-8.383887,-3.788763,6.859375,-5.343601,,,,
1,-10.0,70.75,1.017902,-8.951056,-3.666388,0.015625,"(70.75, -10.0)",2010-05-02,-5.071532,2.053218,...,-4.178654,3.597318,-7.683447,-2.593787,89.03125,-4.327983,,,,
2,-10.0,70.75,2.878513,-8.265143,-2.668402,0.015625,"(70.75, -10.0)",2010-05-03,-4.260209,-0.356851,...,-4.149707,-4.73476,-7.03681,-1.539404,246.546875,-3.425562,,,,
3,-10.0,70.75,-4.627531,-6.45426,-1.165381,0.015625,"(70.75, -10.0)",2010-05-04,-3.174029,-7.889516,...,-2.069854,-7.910963,-3.74903,-0.649023,458.84375,-3.083084,,,,
4,-10.0,70.75,-6.166454,-4.282019,-0.903296,0.015625,"(70.75, -10.0)",2010-05-05,-3.27034,-1.188801,...,-2.05079,0.600817,-1.575079,0.810602,664.28125,-1.809304,,,,


If country or any NUTS NaN it is likely that the coordinates indicate locations on water.

In [27]:
df_weather_13.shape

(1520208, 28)

In [28]:
# instances with country NaN are dropped as they do not indicate land area
df_weather_13 = df_weather_13.dropna(subset=['country'])

In [29]:
# the data is reduced to less than half
df_weather_13.shape

(707472, 28)

In [30]:
# show first instances
df_weather_13.head()

Unnamed: 0,longitude,latitude,wind_2AM,dew_point_2AM,temperature_2AM,uvb_2AM,lat_long,date,apparent_temperature_2AM,wind_11AM,...,apparent_temperature_4PM,wind_11PM,dew_point_11PM,temperature_11PM,uvb_11PM,apparent_temperature_11PM,country,NUTS1,NUTS2,NUTS3
13464,-10.0,54.25,4.151213,5.874353,8.957574,0.015625,"(54.25, -10.0)",2010-05-01,6.778802,0.46031,...,8.57575,-2.928872,6.146844,9.043481,0.015625,6.914311,IE,IE0,IE04,IE042
13465,-10.0,54.25,-2.453366,5.724847,8.563593,0.015625,"(54.25, -10.0)",2010-05-02,6.360651,-1.615499,...,7.034265,-0.691849,3.297235,7.953516,0.015625,5.419132,IE,IE0,IE04,IE042
13466,-10.0,54.25,-1.325611,1.844263,7.287042,0.015625,"(54.25, -10.0)",2010-05-03,4.64236,-2.150167,...,7.615709,0.461049,3.712366,8.132288,0.015625,5.641353,IE,IE0,IE04,IE042
13467,-10.0,54.25,1.224225,3.492853,7.981287,0.015625,"(54.25, -10.0)",2010-05-04,5.467059,2.772763,...,7.849281,4.633374,8.100366,10.132593,0.015625,8.422721,IE,IE0,IE04,IE042
13468,-10.0,54.25,5.224984,9.108331,10.358209,0.015625,"(54.25, -10.0)",2010-05-05,8.912374,1.994057,...,10.325328,2.093152,9.807916,10.582117,0.015625,9.337411,IE,IE0,IE04,IE042


# Loading of MRT Dataset

In [31]:
# loading MRT data for the given years
rad_13 = pd.read_csv('rad_with_MRT_13.csv', index_col=0)

In [32]:
# changing coordinates to string
rad_13['latitude'] = rad_13['latitude'].astype(str)
rad_13['longitude'] = rad_13['longitude'].astype(str)

In [33]:
# merge overall weather set with MRT set
weather_final_13 = df_weather_13.merge(rad_13, how='inner', on=['date','latitude', 'longitude'])

In [34]:
# show first instances
weather_final_13.head()

Unnamed: 0,longitude,latitude,wind_2AM,dew_point_2AM,temperature_2AM,uvb_2AM,lat_long,date,apparent_temperature_2AM,wind_11AM,...,uvb_11PM,apparent_temperature_11PM,country,NUTS1,NUTS2,NUTS3,MRT_2AM,MRT_11AM,MRT_4PM,MRT_11PM
0,-10.0,54.25,4.151213,5.874353,8.957574,0.015625,"(54.25, -10.0)",2010-05-01,6.778802,0.46031,...,0.015625,6.914311,IE,IE0,IE04,IE042,2.72,18.53,14.88,0.64
1,-10.0,54.25,-2.453366,5.724847,8.563593,0.015625,"(54.25, -10.0)",2010-05-02,6.360651,-1.615499,...,0.015625,5.419132,IE,IE0,IE04,IE042,1.68,17.26,17.8,3.33
2,-10.0,54.25,-1.325611,1.844263,7.287042,0.015625,"(54.25, -10.0)",2010-05-03,4.64236,-2.150167,...,0.015625,5.641353,IE,IE0,IE04,IE042,4.26,24.24,20.02,1.04
3,-10.0,54.25,1.224225,3.492853,7.981287,0.015625,"(54.25, -10.0)",2010-05-04,5.467059,2.772763,...,0.015625,8.422721,IE,IE0,IE04,IE042,0.91,22.56,24.24,8.53
4,-10.0,54.25,5.224984,9.108331,10.358209,0.015625,"(54.25, -10.0)",2010-05-05,8.912374,1.994057,...,0.015625,9.337411,IE,IE0,IE04,IE042,8.96,17.24,16.92,9.54


In [35]:
# change date to datetime and get week number
weather_final_13['date'] =  pd.to_datetime(weather_final_13['date'])
weather_final_13['Week_Number'] = weather_final_13['date'].dt.week

  weather_final_13['Week_Number'] = weather_final_13['date'].dt.week


In [36]:
weather_final_13.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 707472 entries, 0 to 707471
Data columns (total 33 columns):
 #   Column                     Non-Null Count   Dtype         
---  ------                     --------------   -----         
 0   longitude                  707472 non-null  object        
 1   latitude                   707472 non-null  object        
 2   wind_2AM                   707472 non-null  float32       
 3   dew_point_2AM              707472 non-null  float64       
 4   temperature_2AM            707472 non-null  float64       
 5   uvb_2AM                    707472 non-null  float32       
 6   lat_long                   707472 non-null  object        
 7   date                       707472 non-null  datetime64[ns]
 8   apparent_temperature_2AM   707472 non-null  float64       
 9   wind_11AM                  707472 non-null  float32       
 10  dew_point_11AM             707472 non-null  float64       
 11  temperature_11AM           707472 non-null  float64 

In [37]:
weather_final_13.head()

Unnamed: 0,longitude,latitude,wind_2AM,dew_point_2AM,temperature_2AM,uvb_2AM,lat_long,date,apparent_temperature_2AM,wind_11AM,...,apparent_temperature_11PM,country,NUTS1,NUTS2,NUTS3,MRT_2AM,MRT_11AM,MRT_4PM,MRT_11PM,Week_Number
0,-10.0,54.25,4.151213,5.874353,8.957574,0.015625,"(54.25, -10.0)",2010-05-01,6.778802,0.46031,...,6.914311,IE,IE0,IE04,IE042,2.72,18.53,14.88,0.64,17
1,-10.0,54.25,-2.453366,5.724847,8.563593,0.015625,"(54.25, -10.0)",2010-05-02,6.360651,-1.615499,...,5.419132,IE,IE0,IE04,IE042,1.68,17.26,17.8,3.33,17
2,-10.0,54.25,-1.325611,1.844263,7.287042,0.015625,"(54.25, -10.0)",2010-05-03,4.64236,-2.150167,...,5.641353,IE,IE0,IE04,IE042,4.26,24.24,20.02,1.04,18
3,-10.0,54.25,1.224225,3.492853,7.981287,0.015625,"(54.25, -10.0)",2010-05-04,5.467059,2.772763,...,8.422721,IE,IE0,IE04,IE042,0.91,22.56,24.24,8.53,18
4,-10.0,54.25,5.224984,9.108331,10.358209,0.015625,"(54.25, -10.0)",2010-05-05,8.912374,1.994057,...,9.337411,IE,IE0,IE04,IE042,8.96,17.24,16.92,9.54,18


In [38]:
# round data to 2 decimals
weather_final_13 = weather_final_13.round(decimals=2)
weather_final_13

Unnamed: 0,longitude,latitude,wind_2AM,dew_point_2AM,temperature_2AM,uvb_2AM,lat_long,date,apparent_temperature_2AM,wind_11AM,...,apparent_temperature_11PM,country,NUTS1,NUTS2,NUTS3,MRT_2AM,MRT_11AM,MRT_4PM,MRT_11PM,Week_Number
0,-10.0,54.25,4.15,5.87,8.96,0.02,"(54.25, -10.0)",2010-05-01,6.78,0.46,...,6.91,IE,IE0,IE04,IE042,2.72,18.53,14.88,0.64,17
1,-10.0,54.25,-2.45,5.72,8.56,0.02,"(54.25, -10.0)",2010-05-02,6.36,-1.62,...,5.42,IE,IE0,IE04,IE042,1.68,17.26,17.80,3.33,17
2,-10.0,54.25,-1.33,1.84,7.29,0.02,"(54.25, -10.0)",2010-05-03,4.64,-2.15,...,5.64,IE,IE0,IE04,IE042,4.26,24.24,20.02,1.04,18
3,-10.0,54.25,1.22,3.49,7.98,0.02,"(54.25, -10.0)",2010-05-04,5.47,2.77,...,8.42,IE,IE0,IE04,IE042,0.91,22.56,24.24,8.53,18
4,-10.0,54.25,5.22,9.11,10.36,0.02,"(54.25, -10.0)",2010-05-05,8.91,1.99,...,9.34,IE,IE0,IE04,IE042,8.96,17.24,16.92,9.54,18
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
707467,29.75,61.75,3.91,2.60,3.79,0.02,"(61.75, 29.75)",2013-09-26,1.22,1.80,...,2.38,FI,FI1,FI1C,FI1C5,1.06,11.69,4.54,3.02,39
707468,29.75,61.75,-1.54,3.91,4.42,0.02,"(61.75, 29.75)",2013-09-27,1.97,-4.25,...,2.10,FI,FI1,FI1C,FI1C5,3.92,10.65,1.09,0.88,39
707469,29.75,61.75,-1.06,3.88,4.36,0.02,"(61.75, 29.75)",2013-09-28,1.91,-0.48,...,1.47,FI,FI1,FI1C,FI1C5,-4.16,15.61,4.13,0.49,39
707470,29.75,61.75,-1.29,0.69,3.27,0.02,"(61.75, 29.75)",2013-09-29,0.60,-0.78,...,-0.41,FI,FI1,FI1C,FI1C5,-0.58,13.33,-4.02,-8.00,39


In [39]:
# drop possible duplicates
weather_final_13 = weather_final_13.drop_duplicates(keep='first')
weather_final_13.shape

(707472, 33)

In [42]:
# write data to csv
weather_final_13.to_csv('FINAL_WEATHER_MERGED_13')