# General Operations on weather data and merging with MRT

import cdsapi

c = cdsapi.Client()

c.retrieve(
    'reanalysis-era5-single-levels',
    {
        'product_type': 'reanalysis',
        'variable': [
            '10m_u_component_of_wind', '2m_dewpoint_temperature', '2m_temperature',
            'downward_uv_radiation_at_the_surface',
        ],
        'year': [
            '2002', '2003', '2004', '2005',
        ],
        'month': [
            '05', '06', '07',
            '08', '09',
        ],
        'day': [
            '01', '02', '03',
            '04', '05', '06',
            '07', '08', '09',
            '10', '11', '12',
            '13', '14', '15',
            '16', '17', '18',
            '19', '20', '21',
            '22', '23', '24',
            '25', '26', '27',
            '28', '29', '30',
            '31',
        ],
        'time': [
            '02:00', '11:00', '16:00',
            '23:00',
        ],
        'area': [
            71.2, -10, 37,
            30,
        ],
        'format': 'netcdf',
    },
    'download02_05.nc')

In [1]:
# load required libraries
import xarray as xr
import numpy as np
import pandas as pd



In [2]:
# load raw dataset
ds = xr.open_dataset('C:/Users/benhu/MasterThesisRawData/download02_05.nc')
df_05 = ds.to_dataframe()

ecCodes library not found using ['eccodes', 'libeccodes.so', 'libeccodes']


In [3]:
# have latitude and longitude as columns
df_05 = df_05.reset_index(level=['longitude', 'latitude', 'time'])

In [4]:
df_05

Unnamed: 0,longitude,latitude,time,u10,d2m,t2m,uvb
0,-10.0,71.0,2002-05-01 02:00:00,-3.760911,272.152954,272.262573,0.000000
1,-10.0,71.0,2002-05-01 11:00:00,-4.396626,272.055389,272.906952,97965.875000
2,-10.0,71.0,2002-05-01 16:00:00,-2.582623,271.833160,272.852844,94045.085938
3,-10.0,71.0,2002-05-01 23:00:00,-1.500735,271.546448,272.671844,33.734375
4,-10.0,71.0,2002-05-02 02:00:00,-0.694367,271.292816,272.395386,0.000000
...,...,...,...,...,...,...,...
53995531,30.0,37.0,2005-09-29 23:00:00,1.297454,275.996368,282.906311,0.000000
53995532,30.0,37.0,2005-09-30 02:00:00,0.861702,275.789398,282.331757,0.000000
53995533,30.0,37.0,2005-09-30 11:00:00,0.885151,278.294312,292.897797,305336.125000
53995534,30.0,37.0,2005-09-30 16:00:00,-0.648121,275.225311,291.947449,10561.171875


In [5]:
df_05.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 53995536 entries, 0 to 53995535
Data columns (total 7 columns):
 #   Column     Dtype         
---  ------     -----         
 0   longitude  float64       
 1   latitude   float64       
 2   time       datetime64[ns]
 3   u10        float32       
 4   d2m        float32       
 5   t2m        float32       
 6   uvb        float32       
dtypes: datetime64[ns](1), float32(4), float64(2)
memory usage: 2.0 GB


In [6]:
#df['appTemp'] = df.apply(lambda row: -2.653+(0.994*df['t2m'])+(0.368*df['d2m']^2), axis=1)
#df.apply(lambda row: row.a + row.b, axis=1)

In [7]:
# specify the coordinates to keep according to European geography
lon = list(np.arange(-10, 31, 0.75))
lat = list(np.arange(37, 72, 0.75))

In [8]:
#lon = list(range(-10, 31))
#lat = list(range(37, 72))

In [9]:
# subset data to coordinates of Europe
df_05 = df_05[df_05['longitude'].isin(lon) & df_05['latitude'].isin(lat)]

In [10]:
df_05.shape

(6080832, 7)

In [11]:
# conver coordinates to string in order to from them to a tuple in a later step
df_05['latitude'] = df_05['latitude'].astype(str)
df_05['longitude'] = df_05['longitude'].astype(str)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_05['latitude'] = df_05['latitude'].astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_05['longitude'] = df_05['longitude'].astype(str)


In [12]:
df_05.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 6080832 entries, 2448 to 53660159
Data columns (total 7 columns):
 #   Column     Dtype         
---  ------     -----         
 0   longitude  object        
 1   latitude   object        
 2   time       datetime64[ns]
 3   u10        float32       
 4   d2m        float32       
 5   t2m        float32       
 6   uvb        float32       
dtypes: datetime64[ns](1), float32(4), object(2)
memory usage: 278.4+ MB


In [13]:
# create unique column for each location by combining latitude and longitude
df_05['lat_long'] = df_05[['latitude', 'longitude']].apply(tuple, axis=1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_05['lat_long'] = df_05[['latitude', 'longitude']].apply(tuple, axis=1)


In [14]:
df_05.head()

Unnamed: 0,longitude,latitude,time,u10,d2m,t2m,uvb,lat_long
2448,-10.0,70.75,2002-05-01 02:00:00,-3.911372,272.504974,272.712158,0.0,"(70.75, -10.0)"
2449,-10.0,70.75,2002-05-01 11:00:00,-4.747702,272.112244,273.039764,115349.640625,"(70.75, -10.0)"
2450,-10.0,70.75,2002-05-01 16:00:00,-2.273233,272.112244,273.112579,101906.921875,"(70.75, -10.0)"
2451,-10.0,70.75,2002-05-01 23:00:00,-1.532651,271.786499,272.939423,6.75,"(70.75, -10.0)"
2452,-10.0,70.75,2002-05-02 02:00:00,-0.723678,271.531189,272.657074,0.0,"(70.75, -10.0)"


In [15]:
# extract date in separate column
df_05['date'] = df_05['time'].dt.strftime('%Y-%m-%d')
# extract time in separate column
df_05['clock'] = df_05['time'].dt.strftime('%H:%M:%S')
#subdf = subdf.drop(['time'], axis=1)
#subdf

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_05['date'] = df_05['time'].dt.strftime('%Y-%m-%d')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_05['clock'] = df_05['time'].dt.strftime('%H:%M:%S')


In [16]:
# get temperature in celsius
df_05['t2m'] = df_05['t2m']-273.15
df_05['d2m'] = df_05['d2m']-273.15

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_05['t2m'] = df_05['t2m']-273.15
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_05['d2m'] = df_05['d2m']-273.15


In [17]:
# get apparent temperature
df_05['apparent_temperature'] = -2.653+(0.994*df_05['t2m'])+(0.0153*df_05['d2m']**2)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_05['apparent_temperature'] = -2.653+(0.994*df_05['t2m'])+(0.0153*df_05['d2m']**2)


In [18]:
df_05

Unnamed: 0,longitude,latitude,time,u10,d2m,t2m,uvb,lat_long,date,clock,apparent_temperature
2448,-10.0,70.75,2002-05-01 02:00:00,-3.911372,-0.645026,-0.437842,0.000000,"(70.75, -10.0)",2002-05-01,02:00:00,-3.081849
2449,-10.0,70.75,2002-05-01 11:00:00,-4.747702,-1.037756,-0.110236,115349.640625,"(70.75, -10.0)",2002-05-01,11:00:00,-2.746097
2450,-10.0,70.75,2002-05-01 16:00:00,-2.273233,-1.037756,-0.037421,101906.921875,"(70.75, -10.0)",2002-05-01,16:00:00,-2.673719
2451,-10.0,70.75,2002-05-01 23:00:00,-1.532651,-1.363501,-0.210577,6.750000,"(70.75, -10.0)",2002-05-01,23:00:00,-2.833869
2452,-10.0,70.75,2002-05-02 02:00:00,-0.723678,-1.618811,-0.492926,0.000000,"(70.75, -10.0)",2002-05-02,02:00:00,-3.102874
...,...,...,...,...,...,...,...,...,...,...,...
53660155,29.75,37.0,2005-09-29 23:00:00,-0.644865,3.012628,10.154749,0.000000,"(37.0, 29.75)",2005-09-29,23:00:00,7.579682
53660156,29.75,37.0,2005-09-30 02:00:00,-0.876744,2.994836,9.970789,0.000000,"(37.0, 29.75)",2005-09-30,02:00:00,7.395190
53660157,29.75,37.0,2005-09-30 11:00:00,1.966387,4.184076,20.209192,304465.562500,"(37.0, 29.75)",2005-09-30,11:00:00,17.702786
53660158,29.75,37.0,2005-09-30 16:00:00,1.417302,4.987360,17.578516,11053.796875,"(37.0, 29.75)",2005-09-30,16:00:00,15.200613


In [19]:
# subset data per hour
sub2am = df_05[df_05['clock'] == '02:00:00']
sub11am = df_05[df_05['clock'] == '11:00:00']
sub16am = df_05[df_05['clock'] == '16:00:00']
sub23am = df_05[df_05['clock'] == '23:00:00']

In [20]:
# rename colums to indicate hours
sub2am = sub2am.rename(columns={"t2m": "temperature_2AM", 'd2m': 'dew_point_2AM', 'uvb': 'uvb_2AM', 'u10': 'wind_2AM', "apparent_temperature": 'apparent_temperature_2AM'})
sub11am = sub11am.rename(columns={"t2m": "temperature_11AM", 'd2m': 'dew_point_11AM', 'uvb': 'uvb_11AM', 'u10': 'wind_11AM', "apparent_temperature": 'apparent_temperature_11AM'})
sub16am = sub16am.rename(columns={"t2m": "temperature_4PM", 'd2m': 'dew_point_4PM', 'uvb': 'uvb_4PM', 'u10': 'wind_4PM', "apparent_temperature": 'apparent_temperature_4PM'})
sub23am = sub23am.rename(columns={"t2m": "temperature_11PM", 'd2m': 'dew_point_11PM', 'uvb': 'uvb_11PM', 'u10': 'wind_11PM', "apparent_temperature": 'apparent_temperature_11PM'})

In [21]:
# drop irrelevant variables from subset
sub2am = sub2am.drop(['clock','time'],1)
sub11am = sub11am.drop(['clock','time','lat_long'],1)
sub16am = sub16am.drop(['clock','time','lat_long'],1)
sub23am = sub23am.drop(['clock','time','lat_long'],1)

In [22]:
# merging subsets of data per hour
df1 = sub2am.merge(sub11am, how='inner', on=['date','latitude', 'longitude'])
df2 = df1.merge(sub16am, how='inner', on=['date','latitude', 'longitude'])
final00_05 = df2.merge(sub23am, how='inner', on=['date','latitude', 'longitude'])
final00_05.head()

Unnamed: 0,longitude,latitude,wind_2AM,dew_point_2AM,temperature_2AM,uvb_2AM,lat_long,date,apparent_temperature_2AM,wind_11AM,...,wind_4PM,dew_point_4PM,temperature_4PM,uvb_4PM,apparent_temperature_4PM,wind_11PM,dew_point_11PM,temperature_11PM,uvb_11PM,apparent_temperature_11PM
0,-10.0,70.75,-3.911372,-0.645026,-0.437842,0.0,"(70.75, -10.0)",2002-05-01,-3.081849,-4.747702,...,-2.273233,-1.037756,-0.037421,101906.921875,-2.673719,-1.532651,-1.363501,-0.210577,6.75,-2.833869
1,-10.0,70.75,-0.723678,-1.618811,-0.492926,0.0,"(70.75, -10.0)",2002-05-02,-3.102874,-0.421452,...,0.672811,-3.734351,-1.522955,66626.515625,-3.953453,1.326764,-3.587622,-1.522955,40.484375,-3.969891
2,-10.0,70.75,1.587955,-3.829382,-1.525916,0.0,"(70.75, -10.0)",2002-05-03,-3.945398,1.718875,...,0.624612,-6.384314,-1.623328,78584.59375,-3.642968,0.902737,-7.278357,-1.92533,94.46875,-3.756268
3,-10.0,70.75,0.308708,-7.906073,-2.071936,0.0,"(70.75, -10.0)",2002-05-04,-3.756163,-0.117273,...,-1.173107,-8.63642,-1.977484,172326.015625,-3.477427,-0.924944,-8.209753,-1.761053,485.875,-3.372266
4,-10.0,70.75,-1.006362,-7.862799,-1.833838,0.0,"(70.75, -10.0)",2002-05-05,-3.529934,-2.555266,...,-4.070952,-6.015326,-0.228278,122752.59375,-2.32629,-6.211931,-2.212592,1.041803,776.0625,-1.542546


In [23]:
#final0002.to_csv('data_00_02.csv')
#final0002 = pd.read_csv('data_00_02.csv', index_col=0)

# Merge Locations with weather data

In [24]:
# read locations data
locations = pd.read_csv('locations1.csv', index_col=0)
locations.head()

Unnamed: 0,lat,lon,country,NUTS1,NUTS2,NUTS3
22,54.25,-10.0,IE,IE0,IE04,IE042
23,53.5,-10.0,IE,IE0,IE04,IE042
25,52.0,-10.0,IE,IE0,IE05,IE053
68,54.25,-9.25,IE,IE0,IE04,IE042
69,53.5,-9.25,IE,IE0,IE04,IE042


In [25]:
# change coordinates to string
locations['lat'] = locations['lat'].astype(str)
locations['lon'] = locations['lon'].astype(str)

In [26]:
# be sure to also have coordinates on weather data as string
final00_05['latitude'] = final00_05['latitude'].astype(str)
final00_05['longitude'] = final00_05['longitude'].astype(str)
# merging locations with weather data
df_weather_05 = pd.merge(final00_05, locations,  how='left', left_on=['latitude','longitude'], right_on = ['lat','lon']).drop(['lat', 'lon'], axis = 1)
df_weather_05.head()

Unnamed: 0,longitude,latitude,wind_2AM,dew_point_2AM,temperature_2AM,uvb_2AM,lat_long,date,apparent_temperature_2AM,wind_11AM,...,apparent_temperature_4PM,wind_11PM,dew_point_11PM,temperature_11PM,uvb_11PM,apparent_temperature_11PM,country,NUTS1,NUTS2,NUTS3
0,-10.0,70.75,-3.911372,-0.645026,-0.437842,0.0,"(70.75, -10.0)",2002-05-01,-3.081849,-4.747702,...,-2.673719,-1.532651,-1.363501,-0.210577,6.75,-2.833869,,,,
1,-10.0,70.75,-0.723678,-1.618811,-0.492926,0.0,"(70.75, -10.0)",2002-05-02,-3.102874,-0.421452,...,-3.953453,1.326764,-3.587622,-1.522955,40.484375,-3.969891,,,,
2,-10.0,70.75,1.587955,-3.829382,-1.525916,0.0,"(70.75, -10.0)",2002-05-03,-3.945398,1.718875,...,-3.642968,0.902737,-7.278357,-1.92533,94.46875,-3.756268,,,,
3,-10.0,70.75,0.308708,-7.906073,-2.071936,0.0,"(70.75, -10.0)",2002-05-04,-3.756163,-0.117273,...,-3.477427,-0.924944,-8.209753,-1.761053,485.875,-3.372266,,,,
4,-10.0,70.75,-1.006362,-7.862799,-1.833838,0.0,"(70.75, -10.0)",2002-05-05,-3.529934,-2.555266,...,-2.32629,-6.211931,-2.212592,1.041803,776.0625,-1.542546,,,,


If country or any NUTS NaN it is likely that the coordinates indicate locations on water.

In [27]:
df_weather_05.shape

(1520208, 28)

In [28]:
# instances with country NaN are dropped as they do not indicate land area
df_weather_05 = df_weather_05.dropna(subset=['country'])

In [29]:
# the data is reduced to less than half
df_weather_05.shape

(707472, 28)

In [30]:
# show first instances
df_weather_05.head()

Unnamed: 0,longitude,latitude,wind_2AM,dew_point_2AM,temperature_2AM,uvb_2AM,lat_long,date,apparent_temperature_2AM,wind_11AM,...,apparent_temperature_4PM,wind_11PM,dew_point_11PM,temperature_11PM,uvb_11PM,apparent_temperature_11PM,country,NUTS1,NUTS2,NUTS3
13464,-10.0,54.25,8.9332,3.928766,7.624414,0.0,"(54.25, -10.0)",2002-05-01,5.161826,5.729873,...,7.561214,0.07292,4.624323,8.390802,0.0,6.014638,IE,IE0,IE04,IE042
13465,-10.0,54.25,1.894739,3.809137,7.993341,0.0,"(54.25, -10.0)",2002-05-02,5.514377,3.288623,...,8.266087,1.950755,5.07049,8.509821,0.0,6.199123,IE,IE0,IE04,IE042
13466,-10.0,54.25,2.513519,4.969568,7.822168,0.0,"(54.25, -10.0)",2002-05-03,5.500093,2.091446,...,8.700652,-2.141661,5.539575,8.168451,0.0,5.93595,IE,IE0,IE04,IE042
13467,-10.0,54.25,-1.916946,5.559106,7.500452,0.0,"(54.25, -10.0)",2002-05-04,5.275275,-0.978354,...,10.828993,0.830437,8.329492,10.414484,0.0,8.760517,IE,IE0,IE04,IE042
13468,-10.0,54.25,0.891664,8.834192,10.250177,0.0,"(54.25, -10.0)",2002-05-05,8.729733,1.533893,...,9.976339,-1.559356,6.662683,9.127649,0.0,7.099071,IE,IE0,IE04,IE042


# Loading of MRT Dataset

In [31]:
# loading MRT data for the given years
rad_05 = pd.read_csv('rad_with_MRT_05.csv', index_col=0)

In [32]:
# changing coordinates to string
rad_05['latitude'] = rad_05['latitude'].astype(str)
rad_05['longitude'] = rad_05['longitude'].astype(str)

In [33]:
# merge overall weather set with MRT set
weather_final_05 = df_weather_05.merge(rad_05, how='inner', on=['date','latitude', 'longitude'])

In [34]:
# show first instances
weather_final_05.head()

Unnamed: 0,longitude,latitude,wind_2AM,dew_point_2AM,temperature_2AM,uvb_2AM,lat_long,date,apparent_temperature_2AM,wind_11AM,...,uvb_11PM,apparent_temperature_11PM,country,NUTS1,NUTS2,NUTS3,MRT_2AM,MRT_11AM,MRT_4PM,MRT_11PM
0,-10.0,54.25,8.9332,3.928766,7.624414,0.0,"(54.25, -10.0)",2002-05-01,5.161826,5.729873,...,0.0,6.014638,IE,IE0,IE04,IE042,-0.78,21.84,17.36,0.8
1,-10.0,54.25,1.894739,3.809137,7.993341,0.0,"(54.25, -10.0)",2002-05-02,5.514377,3.288623,...,0.0,6.199123,IE,IE0,IE04,IE042,0.93,14.58,16.11,-0.57
2,-10.0,54.25,2.513519,4.969568,7.822168,0.0,"(54.25, -10.0)",2002-05-03,5.500093,2.091446,...,0.0,5.93595,IE,IE0,IE04,IE042,-0.75,22.13,13.62,0.7
3,-10.0,54.25,-1.916946,5.559106,7.500452,0.0,"(54.25, -10.0)",2002-05-04,5.275275,-0.978354,...,0.0,8.760517,IE,IE0,IE04,IE042,-1.37,21.85,24.21,7.79
4,-10.0,54.25,0.891664,8.834192,10.250177,0.0,"(54.25, -10.0)",2002-05-05,8.729733,1.533893,...,0.0,7.099071,IE,IE0,IE04,IE042,8.47,24.21,18.95,2.19


In [35]:
# change date to datetime and get week number
weather_final_05['date'] =  pd.to_datetime(weather_final_05['date'])
weather_final_05['Week_Number'] = weather_final_05['date'].dt.week

  weather_final_05['Week_Number'] = weather_final_05['date'].dt.week


In [36]:
weather_final_05.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 707472 entries, 0 to 707471
Data columns (total 33 columns):
 #   Column                     Non-Null Count   Dtype         
---  ------                     --------------   -----         
 0   longitude                  707472 non-null  object        
 1   latitude                   707472 non-null  object        
 2   wind_2AM                   707472 non-null  float32       
 3   dew_point_2AM              707472 non-null  float64       
 4   temperature_2AM            707472 non-null  float64       
 5   uvb_2AM                    707472 non-null  float32       
 6   lat_long                   707472 non-null  object        
 7   date                       707472 non-null  datetime64[ns]
 8   apparent_temperature_2AM   707472 non-null  float64       
 9   wind_11AM                  707472 non-null  float32       
 10  dew_point_11AM             707472 non-null  float64       
 11  temperature_11AM           707472 non-null  float64 

In [37]:
weather_final_05.head()

Unnamed: 0,longitude,latitude,wind_2AM,dew_point_2AM,temperature_2AM,uvb_2AM,lat_long,date,apparent_temperature_2AM,wind_11AM,...,apparent_temperature_11PM,country,NUTS1,NUTS2,NUTS3,MRT_2AM,MRT_11AM,MRT_4PM,MRT_11PM,Week_Number
0,-10.0,54.25,8.9332,3.928766,7.624414,0.0,"(54.25, -10.0)",2002-05-01,5.161826,5.729873,...,6.014638,IE,IE0,IE04,IE042,-0.78,21.84,17.36,0.8,18
1,-10.0,54.25,1.894739,3.809137,7.993341,0.0,"(54.25, -10.0)",2002-05-02,5.514377,3.288623,...,6.199123,IE,IE0,IE04,IE042,0.93,14.58,16.11,-0.57,18
2,-10.0,54.25,2.513519,4.969568,7.822168,0.0,"(54.25, -10.0)",2002-05-03,5.500093,2.091446,...,5.93595,IE,IE0,IE04,IE042,-0.75,22.13,13.62,0.7,18
3,-10.0,54.25,-1.916946,5.559106,7.500452,0.0,"(54.25, -10.0)",2002-05-04,5.275275,-0.978354,...,8.760517,IE,IE0,IE04,IE042,-1.37,21.85,24.21,7.79,18
4,-10.0,54.25,0.891664,8.834192,10.250177,0.0,"(54.25, -10.0)",2002-05-05,8.729733,1.533893,...,7.099071,IE,IE0,IE04,IE042,8.47,24.21,18.95,2.19,18


In [38]:
# round data to 2 decimals
weather_final_05 = weather_final_05.round(decimals=2)
weather_final_05

Unnamed: 0,longitude,latitude,wind_2AM,dew_point_2AM,temperature_2AM,uvb_2AM,lat_long,date,apparent_temperature_2AM,wind_11AM,...,apparent_temperature_11PM,country,NUTS1,NUTS2,NUTS3,MRT_2AM,MRT_11AM,MRT_4PM,MRT_11PM,Week_Number
0,-10.0,54.25,8.93,3.93,7.62,0.0,"(54.25, -10.0)",2002-05-01,5.16,5.73,...,6.01,IE,IE0,IE04,IE042,-0.78,21.84,17.36,0.80,18
1,-10.0,54.25,1.89,3.81,7.99,0.0,"(54.25, -10.0)",2002-05-02,5.51,3.29,...,6.20,IE,IE0,IE04,IE042,0.93,14.58,16.11,-0.57,18
2,-10.0,54.25,2.51,4.97,7.82,0.0,"(54.25, -10.0)",2002-05-03,5.50,2.09,...,5.94,IE,IE0,IE04,IE042,-0.75,22.13,13.62,0.70,18
3,-10.0,54.25,-1.92,5.56,7.50,0.0,"(54.25, -10.0)",2002-05-04,5.28,-0.98,...,8.76,IE,IE0,IE04,IE042,-1.37,21.85,24.21,7.79,18
4,-10.0,54.25,0.89,8.83,10.25,0.0,"(54.25, -10.0)",2002-05-05,8.73,1.53,...,7.10,IE,IE0,IE04,IE042,8.47,24.21,18.95,2.19,18
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
707467,29.75,61.75,0.40,7.01,7.90,0.0,"(61.75, 29.75)",2005-09-26,5.95,0.27,...,9.07,FI,FI1,FI1C,FI1C5,3.09,21.78,5.16,2.92,39
707468,29.75,61.75,1.31,8.99,10.03,0.0,"(61.75, 29.75)",2005-09-27,8.56,1.01,...,10.32,FI,FI1,FI1C,FI1C5,2.12,14.29,5.68,3.63,39
707469,29.75,61.75,0.44,10.06,11.19,0.0,"(61.75, 29.75)",2005-09-28,10.02,1.22,...,12.65,FI,FI1,FI1C,FI1C5,3.44,24.74,11.12,9.92,39
707470,29.75,61.75,0.21,10.94,12.80,0.0,"(61.75, 29.75)",2005-09-29,11.90,1.68,...,5.52,FI,FI1,FI1C,FI1C5,11.01,15.86,7.22,0.63,39


In [39]:
# drop possible duplicates
weather_final_05 = weather_final_05.drop_duplicates(keep='first')
weather_final_05.shape

(707472, 33)

In [42]:
# write data to csv
weather_final_05.to_csv('FINAL_WEATHER_MERGED_05')