# General Operations on weather data and merging with MRT

import cdsapi

c = cdsapi.Client()

c.retrieve(
    'reanalysis-era5-single-levels',
    {
        'product_type': 'reanalysis',
        'variable': [
            '10m_u_component_of_wind', '2m_dewpoint_temperature', '2m_temperature',
            'downward_uv_radiation_at_the_surface',
        ],
        'year': [
            '2014', '2015', '2016', '2017',
        ],
        'month': [
            '05', '06', '07',
            '08', '09',
        ],
        'day': [
            '01', '02', '03',
            '04', '05', '06',
            '07', '08', '09',
            '10', '11', '12',
            '13', '14', '15',
            '16', '17', '18',
            '19', '20', '21',
            '22', '23', '24',
            '25', '26', '27',
            '28', '29', '30',
            '31',
        ],
        'time': [
            '02:00', '11:00', '16:00',
            '23:00',
        ],
        'area': [
            71.2, -10, 37,
            30,
        ],
        'format': 'netcdf',
    },
    'download14_17.nc')


In [1]:
import xarray as xr
import numpy as np
import pandas as pd



In [2]:
ds = xr.open_dataset('C:/Users/benhu/MasterThesisRawData/download14_17.nc')
df_17 = ds.to_dataframe()

ecCodes library not found using ['eccodes', 'libeccodes.so', 'libeccodes']


In [3]:
# have latitude and longitude as columns
df_17 = df_17.reset_index(level=['longitude', 'latitude', 'time'])

In [4]:
df_17

Unnamed: 0,longitude,latitude,time,u10,d2m,t2m,uvb
0,-10.0,71.0,2014-05-01 02:00:00,6.314373,266.756409,270.979950,-0.015625
1,-10.0,71.0,2014-05-01 11:00:00,6.646193,267.242950,270.661011,162607.234375
2,-10.0,71.0,2014-05-01 16:00:00,6.546647,267.473114,271.106445,133448.968750
3,-10.0,71.0,2014-05-01 23:00:00,3.628547,267.014496,271.065186,60.625000
4,-10.0,71.0,2014-05-02 02:00:00,2.597991,266.676208,270.863556,-0.015625
...,...,...,...,...,...,...,...
53995531,30.0,37.0,2017-09-29 23:00:00,0.177620,279.015564,284.010315,-0.015625
53995532,30.0,37.0,2017-09-30 02:00:00,0.175706,279.350372,282.880219,-0.015625
53995533,30.0,37.0,2017-09-30 11:00:00,0.584099,279.273651,289.160309,188605.796875
53995534,30.0,37.0,2017-09-30 16:00:00,-0.291394,279.413147,288.725891,7127.875000


In [5]:
df_17.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 53995536 entries, 0 to 53995535
Data columns (total 7 columns):
 #   Column     Dtype         
---  ------     -----         
 0   longitude  float64       
 1   latitude   float64       
 2   time       datetime64[ns]
 3   u10        float32       
 4   d2m        float32       
 5   t2m        float32       
 6   uvb        float32       
dtypes: datetime64[ns](1), float32(4), float64(2)
memory usage: 2.0 GB


In [6]:
#df['appTemp'] = df.apply(lambda row: -2.653+(0.994*df['t2m'])+(0.368*df['d2m']^2), axis=1)
#df.apply(lambda row: row.a + row.b, axis=1)

In [7]:
# specify the coordinates to keep according to European geography
lon = list(np.arange(-10, 31, 0.75))
lat = list(np.arange(37, 72, 0.75))

In [8]:
#lon = list(range(-10, 31))
#lat = list(range(37, 72))

In [9]:
# subset data to coordinates of Europe
df_17 = df_17[df_17['longitude'].isin(lon) & df_17['latitude'].isin(lat)]

In [10]:
df_17.shape

(6080832, 7)

In [11]:
# conver coordinates to string in order to from them to a tuple in a later step
df_17['latitude'] = df_17['latitude'].astype(str)
df_17['longitude'] = df_17['longitude'].astype(str)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_17['latitude'] = df_17['latitude'].astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_17['longitude'] = df_17['longitude'].astype(str)


In [12]:
df_17.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 6080832 entries, 2448 to 53660159
Data columns (total 7 columns):
 #   Column     Dtype         
---  ------     -----         
 0   longitude  object        
 1   latitude   object        
 2   time       datetime64[ns]
 3   u10        float32       
 4   d2m        float32       
 5   t2m        float32       
 6   uvb        float32       
dtypes: datetime64[ns](1), float32(4), object(2)
memory usage: 278.4+ MB


In [13]:
# create unique column for each location by combining latitude and longitude
df_17['lat_long'] = df_17[['latitude', 'longitude']].apply(tuple, axis=1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_17['lat_long'] = df_17[['latitude', 'longitude']].apply(tuple, axis=1)


In [14]:
df_17.head()

Unnamed: 0,longitude,latitude,time,u10,d2m,t2m,uvb,lat_long
2448,-10.0,70.75,2014-05-01 02:00:00,6.593867,266.989197,271.095428,-0.015625,"(70.75, -10.0)"
2449,-10.0,70.75,2014-05-01 11:00:00,7.402997,267.461792,271.008362,159663.109375,"(70.75, -10.0)"
2450,-10.0,70.75,2014-05-01 16:00:00,7.620595,267.621338,271.667358,128571.28125,"(70.75, -10.0)"
2451,-10.0,70.75,2014-05-01 23:00:00,4.610606,267.467896,271.362152,13.46875,"(70.75, -10.0)"
2452,-10.0,70.75,2014-05-02 02:00:00,3.485609,266.752045,271.211823,-0.015625,"(70.75, -10.0)"


In [15]:
# extract date in separate column
df_17['date'] = df_17['time'].dt.strftime('%Y-%m-%d')
# extract time in separate column
df_17['clock'] = df_17['time'].dt.strftime('%H:%M:%S')
#subdf = subdf.drop(['time'], axis=1)
#subdf

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_17['date'] = df_17['time'].dt.strftime('%Y-%m-%d')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_17['clock'] = df_17['time'].dt.strftime('%H:%M:%S')


In [16]:
# get temperature in celsius
df_17['t2m'] = df_17['t2m']-273.15
df_17['d2m'] = df_17['d2m']-273.15

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_17['t2m'] = df_17['t2m']-273.15
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_17['d2m'] = df_17['d2m']-273.15


In [17]:
# get apparent temperature
df_17['apparent_temperature'] = -2.653+(0.994*df_17['t2m'])+(0.0153*df_17['d2m']**2)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_17['apparent_temperature'] = -2.653+(0.994*df_17['t2m'])+(0.0153*df_17['d2m']**2)


In [18]:
df_17

Unnamed: 0,longitude,latitude,time,u10,d2m,t2m,uvb,lat_long,date,clock,apparent_temperature
2448,-10.0,70.75,2014-05-01 02:00:00,6.593867,-6.160803,-2.054572,-0.015625,"(70.75, -10.0)",2014-05-01,02:00:00,-4.114525
2449,-10.0,70.75,2014-05-01 11:00:00,7.402997,-5.688208,-2.141638,159663.109375,"(70.75, -10.0)",2014-05-01,11:00:00,-4.286746
2450,-10.0,70.75,2014-05-01 16:00:00,7.620595,-5.528662,-1.482642,128571.281250,"(70.75, -10.0)",2014-05-01,16:00:00,-3.659084
2451,-10.0,70.75,2014-05-01 23:00:00,4.610606,-5.682104,-1.787848,13.468750,"(70.75, -10.0)",2014-05-01,23:00:00,-3.936140
2452,-10.0,70.75,2014-05-02 02:00:00,3.485609,-6.397955,-1.938177,-0.015625,"(70.75, -10.0)",2014-05-02,02:00:00,-3.953261
...,...,...,...,...,...,...,...,...,...,...,...
53660155,29.75,37.0,2017-09-29 23:00:00,-0.046996,7.250146,12.242456,-0.015625,"(37.0, 29.75)",2017-09-29,23:00:00,10.320240
53660156,29.75,37.0,2017-09-30 02:00:00,-0.016367,6.653772,11.027124,-0.015625,"(37.0, 29.75)",2017-09-30,02:00:00,8.985333
53660157,29.75,37.0,2017-09-30 11:00:00,2.351678,8.224969,16.570337,216335.812500,"(37.0, 29.75)",2017-09-30,11:00:00,14.852962
53660158,29.75,37.0,2017-09-30 16:00:00,1.381105,9.491846,14.926050,7080.703125,"(37.0, 29.75)",2017-09-30,16:00:00,13.561949


In [19]:
# subset data per hour
sub2am = df_17[df_17['clock'] == '02:00:00']
sub11am = df_17[df_17['clock'] == '11:00:00']
sub16am = df_17[df_17['clock'] == '16:00:00']
sub23am = df_17[df_17['clock'] == '23:00:00']

In [20]:
# rename colums to indicate hours
sub2am = sub2am.rename(columns={"t2m": "temperature_2AM", 'd2m': 'dew_point_2AM', 'uvb': 'uvb_2AM', 'u10': 'wind_2AM', "apparent_temperature": 'apparent_temperature_2AM'})
sub11am = sub11am.rename(columns={"t2m": "temperature_11AM", 'd2m': 'dew_point_11AM', 'uvb': 'uvb_11AM', 'u10': 'wind_11AM', "apparent_temperature": 'apparent_temperature_11AM'})
sub16am = sub16am.rename(columns={"t2m": "temperature_4PM", 'd2m': 'dew_point_4PM', 'uvb': 'uvb_4PM', 'u10': 'wind_4PM', "apparent_temperature": 'apparent_temperature_4PM'})
sub23am = sub23am.rename(columns={"t2m": "temperature_11PM", 'd2m': 'dew_point_11PM', 'uvb': 'uvb_11PM', 'u10': 'wind_11PM', "apparent_temperature": 'apparent_temperature_11PM'})

In [21]:
# drop irrelevant variables from subset
sub2am = sub2am.drop(['clock','time'],1)
sub11am = sub11am.drop(['clock','time','lat_long'],1)
sub16am = sub16am.drop(['clock','time','lat_long'],1)
sub23am = sub23am.drop(['clock','time','lat_long'],1)

In [22]:
# merging subsets of data per hour
df1 = sub2am.merge(sub11am, how='inner', on=['date','latitude', 'longitude'])
df2 = df1.merge(sub16am, how='inner', on=['date','latitude', 'longitude'])
final00_17 = df2.merge(sub23am, how='inner', on=['date','latitude', 'longitude'])
final00_17.head()

Unnamed: 0,longitude,latitude,wind_2AM,dew_point_2AM,temperature_2AM,uvb_2AM,lat_long,date,apparent_temperature_2AM,wind_11AM,...,wind_4PM,dew_point_4PM,temperature_4PM,uvb_4PM,apparent_temperature_4PM,wind_11PM,dew_point_11PM,temperature_11PM,uvb_11PM,apparent_temperature_11PM
0,-10.0,70.75,6.593867,-6.160803,-2.054572,-0.015625,"(70.75, -10.0)",2014-05-01,-4.114525,7.402997,...,7.620595,-5.528662,-1.482642,128571.28125,-3.659084,4.610606,-5.682104,-1.787848,13.46875,-3.93614
1,-10.0,70.75,3.485609,-6.397955,-1.938177,-0.015625,"(70.75, -10.0)",2014-05-02,-3.953261,1.256035,...,1.513833,-8.578619,-3.808813,82873.359375,-5.312992,2.620963,-9.044226,-4.131445,53.890625,-5.508147
2,-10.0,70.75,2.74029,-9.149725,-4.39173,-0.015625,"(70.75, -10.0)",2014-05-03,-5.737502,1.344095,...,0.852108,-10.317206,-4.415564,120163.351562,-5.413466,-0.620024,-10.780188,-4.60163,202.109375,-5.448969
3,-10.0,70.75,-0.658949,-10.687781,-4.406409,-0.015625,"(70.75, -10.0)",2014-05-04,-5.285272,-2.922343,...,-4.567404,-4.886053,-0.380072,126038.125,-2.665527,-3.86803,-2.998358,0.766534,397.484375,-1.753515
4,-10.0,70.75,-4.719275,-2.511816,1.042413,-0.015625,"(70.75, -10.0)",2014-05-05,-1.52031,-7.91687,...,-8.733019,-0.72962,2.134912,78433.59375,-0.522752,-8.733019,0.180658,2.513452,397.484375,-0.154129


In [23]:
#final0002.to_csv('data_00_02.csv')
#final0002 = pd.read_csv('data_00_02.csv', index_col=0)

# Merge Locations with weather data

In [24]:
# read locations data
locations = pd.read_csv('locations1.csv', index_col=0)
locations.head()

Unnamed: 0,lat,lon,country,NUTS1,NUTS2,NUTS3
22,54.25,-10.0,IE,IE0,IE04,IE042
23,53.5,-10.0,IE,IE0,IE04,IE042
25,52.0,-10.0,IE,IE0,IE05,IE053
68,54.25,-9.25,IE,IE0,IE04,IE042
69,53.5,-9.25,IE,IE0,IE04,IE042


In [25]:
# change coordinates to string
locations['lat'] = locations['lat'].astype(str)
locations['lon'] = locations['lon'].astype(str)

In [26]:
# be sure to also have coordinates on weather data as string
final00_17['latitude'] = final00_17['latitude'].astype(str)
final00_17['longitude'] = final00_17['longitude'].astype(str)
# merging locations with weather data
df_weather_17 = pd.merge(final00_17, locations,  how='left', left_on=['latitude','longitude'], right_on = ['lat','lon']).drop(['lat', 'lon'], axis = 1)
df_weather_17.head()

Unnamed: 0,longitude,latitude,wind_2AM,dew_point_2AM,temperature_2AM,uvb_2AM,lat_long,date,apparent_temperature_2AM,wind_11AM,...,apparent_temperature_4PM,wind_11PM,dew_point_11PM,temperature_11PM,uvb_11PM,apparent_temperature_11PM,country,NUTS1,NUTS2,NUTS3
0,-10.0,70.75,6.593867,-6.160803,-2.054572,-0.015625,"(70.75, -10.0)",2014-05-01,-4.114525,7.402997,...,-3.659084,4.610606,-5.682104,-1.787848,13.46875,-3.93614,,,,
1,-10.0,70.75,3.485609,-6.397955,-1.938177,-0.015625,"(70.75, -10.0)",2014-05-02,-3.953261,1.256035,...,-5.312992,2.620963,-9.044226,-4.131445,53.890625,-5.508147,,,,
2,-10.0,70.75,2.74029,-9.149725,-4.39173,-0.015625,"(70.75, -10.0)",2014-05-03,-5.737502,1.344095,...,-5.413466,-0.620024,-10.780188,-4.60163,202.109375,-5.448969,,,,
3,-10.0,70.75,-0.658949,-10.687781,-4.406409,-0.015625,"(70.75, -10.0)",2014-05-04,-5.285272,-2.922343,...,-2.665527,-3.86803,-2.998358,0.766534,397.484375,-1.753515,,,,
4,-10.0,70.75,-4.719275,-2.511816,1.042413,-0.015625,"(70.75, -10.0)",2014-05-05,-1.52031,-7.91687,...,-0.522752,-8.733019,0.180658,2.513452,397.484375,-0.154129,,,,


If country or any NUTS NaN it is likely that the coordinates indicate locations on water.

In [27]:
df_weather_17.shape

(1520208, 28)

In [28]:
# instances with country NaN are dropped as they do not indicate land area
df_weather_17 = df_weather_17.dropna(subset=['country'])

In [29]:
# the data is reduced to less than half
df_weather_17.shape

(707472, 28)

In [30]:
# show first instances
df_weather_17.head()

Unnamed: 0,longitude,latitude,wind_2AM,dew_point_2AM,temperature_2AM,uvb_2AM,lat_long,date,apparent_temperature_2AM,wind_11AM,...,apparent_temperature_4PM,wind_11PM,dew_point_11PM,temperature_11PM,uvb_11PM,apparent_temperature_11PM,country,NUTS1,NUTS2,NUTS3
13464,-10.0,54.25,1.384934,9.278253,10.702661,-0.015625,"(54.25, -10.0)",2014-05-01,9.302561,0.690027,...,10.906797,-1.568263,9.669733,10.435052,-0.015625,9.150049,IE,IE0,IE04,IE042
13465,-10.0,54.25,-2.261256,9.393335,10.416711,-0.015625,"(54.25, -10.0)",2014-05-02,9.051203,-0.233326,...,11.255604,-1.46106,9.236383,11.11236,-0.015625,9.69794,IE,IE0,IE04,IE042
13466,-10.0,54.25,-1.661428,9.533716,11.141687,-0.015625,"(54.25, -10.0)",2014-05-03,9.81248,0.665778,...,12.520048,0.941444,10.589594,11.783258,-0.015625,10.775293,IE,IE0,IE04,IE042
13467,-10.0,54.25,1.102887,10.43963,11.556879,-0.015625,"(54.25, -10.0)",2014-05-04,10.502021,1.434707,...,11.098565,-1.029694,9.196283,11.553217,-0.015625,10.124843,IE,IE0,IE04,IE042
13468,-10.0,54.25,-2.80812,8.61297,10.950128,-0.015625,"(54.25, -10.0)",2014-05-05,9.366431,-3.578963,...,9.378821,4.089266,7.116754,9.693567,-0.015625,7.757323,IE,IE0,IE04,IE042


# Loading of MRT Dataset

In [31]:
# loading MRT data for the given years
rad_17 = pd.read_csv('rad_with_MRT_17.csv', index_col=0)

In [32]:
# changing coordinates to string
rad_17['latitude'] = rad_17['latitude'].astype(str)
rad_17['longitude'] = rad_17['longitude'].astype(str)

In [33]:
# merge overall weather set with MRT set
weather_final_17 = df_weather_17.merge(rad_17, how='inner', on=['date','latitude', 'longitude'])

In [34]:
# show first instances
weather_final_17.head()

Unnamed: 0,longitude,latitude,wind_2AM,dew_point_2AM,temperature_2AM,uvb_2AM,lat_long,date,apparent_temperature_2AM,wind_11AM,...,uvb_11PM,apparent_temperature_11PM,country,NUTS1,NUTS2,NUTS3,MRT_2AM,MRT_11AM,MRT_4PM,MRT_11PM
0,-10.0,54.25,1.384934,9.278253,10.702661,-0.015625,"(54.25, -10.0)",2014-05-01,9.302561,0.690027,...,-0.015625,9.150049,IE,IE0,IE04,IE042,8.11,15.9,14.88,5.34
1,-10.0,54.25,-2.261256,9.393335,10.416711,-0.015625,"(54.25, -10.0)",2014-05-02,9.051203,-0.233326,...,-0.015625,9.69794,IE,IE0,IE04,IE042,5.72,29.11,22.26,9.45
2,-10.0,54.25,-1.661428,9.533716,11.141687,-0.015625,"(54.25, -10.0)",2014-05-03,9.81248,0.665778,...,-0.015625,10.775293,IE,IE0,IE04,IE042,9.91,18.22,27.65,8.73
3,-10.0,54.25,1.102887,10.43963,11.556879,-0.015625,"(54.25, -10.0)",2014-05-04,10.502021,1.434707,...,-0.015625,10.124843,IE,IE0,IE04,IE042,9.05,23.97,25.09,9.29
4,-10.0,54.25,-2.80812,8.61297,10.950128,-0.015625,"(54.25, -10.0)",2014-05-05,9.366431,-3.578963,...,-0.015625,7.757323,IE,IE0,IE04,IE042,9.6,25.78,21.36,3.61


In [35]:
# change date to datetime and get week number
weather_final_17['date'] =  pd.to_datetime(weather_final_17['date'])
weather_final_17['Week_Number'] = weather_final_17['date'].dt.week

  weather_final_17['Week_Number'] = weather_final_17['date'].dt.week


In [36]:
weather_final_17.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 707472 entries, 0 to 707471
Data columns (total 33 columns):
 #   Column                     Non-Null Count   Dtype         
---  ------                     --------------   -----         
 0   longitude                  707472 non-null  object        
 1   latitude                   707472 non-null  object        
 2   wind_2AM                   707472 non-null  float32       
 3   dew_point_2AM              707472 non-null  float64       
 4   temperature_2AM            707472 non-null  float64       
 5   uvb_2AM                    707472 non-null  float32       
 6   lat_long                   707472 non-null  object        
 7   date                       707472 non-null  datetime64[ns]
 8   apparent_temperature_2AM   707472 non-null  float64       
 9   wind_11AM                  707472 non-null  float32       
 10  dew_point_11AM             707472 non-null  float64       
 11  temperature_11AM           707472 non-null  float64 

In [37]:
weather_final_17.head()

Unnamed: 0,longitude,latitude,wind_2AM,dew_point_2AM,temperature_2AM,uvb_2AM,lat_long,date,apparent_temperature_2AM,wind_11AM,...,apparent_temperature_11PM,country,NUTS1,NUTS2,NUTS3,MRT_2AM,MRT_11AM,MRT_4PM,MRT_11PM,Week_Number
0,-10.0,54.25,1.384934,9.278253,10.702661,-0.015625,"(54.25, -10.0)",2014-05-01,9.302561,0.690027,...,9.150049,IE,IE0,IE04,IE042,8.11,15.9,14.88,5.34,18
1,-10.0,54.25,-2.261256,9.393335,10.416711,-0.015625,"(54.25, -10.0)",2014-05-02,9.051203,-0.233326,...,9.69794,IE,IE0,IE04,IE042,5.72,29.11,22.26,9.45,18
2,-10.0,54.25,-1.661428,9.533716,11.141687,-0.015625,"(54.25, -10.0)",2014-05-03,9.81248,0.665778,...,10.775293,IE,IE0,IE04,IE042,9.91,18.22,27.65,8.73,18
3,-10.0,54.25,1.102887,10.43963,11.556879,-0.015625,"(54.25, -10.0)",2014-05-04,10.502021,1.434707,...,10.124843,IE,IE0,IE04,IE042,9.05,23.97,25.09,9.29,18
4,-10.0,54.25,-2.80812,8.61297,10.950128,-0.015625,"(54.25, -10.0)",2014-05-05,9.366431,-3.578963,...,7.757323,IE,IE0,IE04,IE042,9.6,25.78,21.36,3.61,19


In [38]:
# round data to 2 decimals
weather_final_17 = weather_final_17.round(decimals=2)
weather_final_17

Unnamed: 0,longitude,latitude,wind_2AM,dew_point_2AM,temperature_2AM,uvb_2AM,lat_long,date,apparent_temperature_2AM,wind_11AM,...,apparent_temperature_11PM,country,NUTS1,NUTS2,NUTS3,MRT_2AM,MRT_11AM,MRT_4PM,MRT_11PM,Week_Number
0,-10.0,54.25,1.38,9.28,10.70,-0.02,"(54.25, -10.0)",2014-05-01,9.30,0.69,...,9.15,IE,IE0,IE04,IE042,8.11,15.90,14.88,5.34,18
1,-10.0,54.25,-2.26,9.39,10.42,-0.02,"(54.25, -10.0)",2014-05-02,9.05,-0.23,...,9.70,IE,IE0,IE04,IE042,5.72,29.11,22.26,9.45,18
2,-10.0,54.25,-1.66,9.53,11.14,-0.02,"(54.25, -10.0)",2014-05-03,9.81,0.67,...,10.78,IE,IE0,IE04,IE042,9.91,18.22,27.65,8.73,18
3,-10.0,54.25,1.10,10.44,11.56,-0.02,"(54.25, -10.0)",2014-05-04,10.50,1.43,...,10.12,IE,IE0,IE04,IE042,9.05,23.97,25.09,9.29,18
4,-10.0,54.25,-2.81,8.61,10.95,-0.02,"(54.25, -10.0)",2014-05-05,9.37,-3.58,...,7.76,IE,IE0,IE04,IE042,9.60,25.78,21.36,3.61,19
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
707467,29.75,61.75,-0.82,8.65,9.16,-0.02,"(61.75, 29.75)",2017-09-26,7.60,-0.30,...,6.68,FI,FI1,FI1C,FI1C5,9.35,20.28,9.97,8.78,39
707468,29.75,61.75,-0.44,7.37,7.95,-0.02,"(61.75, 29.75)",2017-09-27,6.08,0.08,...,4.28,FI,FI1,FI1C,FI1C5,7.88,20.39,7.75,5.87,39
707469,29.75,61.75,1.42,5.47,5.90,-0.02,"(61.75, 29.75)",2017-09-28,3.67,0.97,...,3.55,FI,FI1,FI1C,FI1C5,6.43,19.28,8.27,6.70,39
707470,29.75,61.75,-0.01,5.20,5.83,-0.02,"(61.75, 29.75)",2017-09-29,3.56,-0.79,...,4.09,FI,FI1,FI1C,FI1C5,7.16,15.75,7.45,6.53,39


In [39]:
# drop possible duplicates
weather_final_17 = weather_final_17.drop_duplicates(keep='first')
weather_final_17.shape

(707472, 33)

In [42]:
# write data to csv
weather_final_17.to_csv('FINAL_WEATHER_MERGED_17')