# General Operations on weather data and merging with MRT

In [1]:
# load required libraries
import xarray as xr
import numpy as np
import pandas as pd



import cdsapi

c = cdsapi.Client()

c.retrieve(
    'reanalysis-era5-single-levels',
    {
        'product_type': 'reanalysis',
        'variable': [
            '10m_u_component_of_wind', '2m_dewpoint_temperature', '2m_temperature',
            'downward_uv_radiation_at_the_surface',
        ],
        'year': [
            '2000', '2001',
        ],
        'month': [
            '05', '06', '07',
            '08', '09',
        ],
        'day': [
            '01', '02', '03',
            '04', '05', '06',
            '07', '08', '09',
            '10', '11', '12',
            '13', '14', '15',
            '16', '17', '18',
            '19', '20', '21',
            '22', '23', '24',
            '25', '26', '27',
            '28', '29', '30',
            '31',
        ],
        'time': [
            '02:00', '11:00', '16:00',
            '23:00',
        ],
        'area': [
            71.2, -10, 37,
            30,
        ],
        'format': 'netcdf',
    },
    'download00_02.nc')

In [2]:
# load raw dataset
ds = xr.open_dataset('C:/Users/benhu/MasterThesisRawData/download00_02.nc')
df_01 = ds.to_dataframe()

ecCodes library not found using ['eccodes', 'libeccodes.so', 'libeccodes']


In [4]:
# have latitude and longitude as columns
df_01 = df_01.reset_index(level=['longitude', 'latitude', 'time'])

In [5]:
df_01

Unnamed: 0,longitude,latitude,time,u10,d2m,t2m,uvb
0,-10.0,71.0,2000-05-01 02:00:00,1.203314,272.521851,272.965027,-0.015625
1,-10.0,71.0,2000-05-01 11:00:00,7.669049,272.755676,273.362762,115962.546875
2,-10.0,71.0,2000-05-01 16:00:00,5.220461,271.288910,272.013092,118450.382812
3,-10.0,71.0,2000-05-01 23:00:00,-2.342635,271.047455,271.893860,46.937500
4,-10.0,71.0,2000-05-02 02:00:00,-5.665585,270.957275,271.996887,-0.015625
...,...,...,...,...,...,...,...
26997763,30.0,37.0,2001-09-29 23:00:00,1.541593,273.760315,284.819305,-0.015625
26997764,30.0,37.0,2001-09-30 02:00:00,1.092444,272.403900,283.421936,-0.015625
26997765,30.0,37.0,2001-09-30 11:00:00,0.233202,273.138672,292.945984,316873.562500
26997766,30.0,37.0,2001-09-30 16:00:00,-1.252204,274.026062,289.272766,10849.890625


In [6]:
df_01.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 26997768 entries, 0 to 26997767
Data columns (total 7 columns):
 #   Column     Dtype         
---  ------     -----         
 0   longitude  float64       
 1   latitude   float64       
 2   time       datetime64[ns]
 3   u10        float32       
 4   d2m        float32       
 5   t2m        float32       
 6   uvb        float32       
dtypes: datetime64[ns](1), float32(4), float64(2)
memory usage: 1.0 GB


In [7]:
#df['appTemp'] = df.apply(lambda row: -2.653+(0.994*df['t2m'])+(0.368*df['d2m']^2), axis=1)
#df.apply(lambda row: row.a + row.b, axis=1)

In [8]:
# specify the coordinates to keep according to European geography
lon = list(np.arange(-10, 31, 0.75))
lat = list(np.arange(37, 72, 0.75))

In [9]:
#lon = list(range(-10, 31))
#lat = list(range(37, 72))

In [10]:
# subset data to coordinates of Europe
df_01 = df_01[df_01['longitude'].isin(lon) & df_01['latitude'].isin(lat)]

In [11]:
df_01.shape

(3040416, 7)

In [13]:
# conver coordinates to string in order to from them to a tuple in a later step
df_01['latitude'] = df_01['latitude'].astype(str)
df_01['longitude'] = df_01['longitude'].astype(str)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_01['latitude'] = df_01['latitude'].astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_01['longitude'] = df_01['longitude'].astype(str)


In [14]:
df_01.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 3040416 entries, 1224 to 26830079
Data columns (total 7 columns):
 #   Column     Dtype         
---  ------     -----         
 0   longitude  object        
 1   latitude   object        
 2   time       datetime64[ns]
 3   u10        float32       
 4   d2m        float32       
 5   t2m        float32       
 6   uvb        float32       
dtypes: datetime64[ns](1), float32(4), object(2)
memory usage: 139.2+ MB


In [15]:
# create unique column for each location by combining latitude and longitude
df_01['lat_long'] = df_01[['latitude', 'longitude']].apply(tuple, axis=1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_01['lat_long'] = df_01[['latitude', 'longitude']].apply(tuple, axis=1)


In [16]:
df_01.head()

Unnamed: 0,longitude,latitude,time,u10,d2m,t2m,uvb,lat_long
1224,-10.0,70.75,2000-05-01 02:00:00,0.655893,272.642578,273.097595,-0.015625,"(70.75, -10.0)"
1225,-10.0,70.75,2000-05-01 11:00:00,7.269666,272.875,273.460083,118021.210938,"(70.75, -10.0)"
1226,-10.0,70.75,2000-05-01 16:00:00,4.387676,271.962616,272.667419,116230.78125,"(70.75, -10.0)"
1227,-10.0,70.75,2000-05-01 23:00:00,-3.255423,272.084045,272.538666,13.40625,"(70.75, -10.0)"
1228,-10.0,70.75,2000-05-02 02:00:00,-5.847638,272.002167,272.545349,-0.015625,"(70.75, -10.0)"


In [17]:
# extract date in separate column
df_01['date'] = df_01['time'].dt.strftime('%Y-%m-%d')
# extract time in separate column
df_01['clock'] = df_01['time'].dt.strftime('%H:%M:%S')
#subdf = subdf.drop(['time'], axis=1)
#subdf

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_01['date'] = df_01['time'].dt.strftime('%Y-%m-%d')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_01['clock'] = df_01['time'].dt.strftime('%H:%M:%S')


In [18]:
# get temperature in celsius
df_01['t2m'] = df_01['t2m']-273.15
df_01['d2m'] = df_01['d2m']-273.15

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_01['t2m'] = df_01['t2m']-273.15
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_01['d2m'] = df_01['d2m']-273.15


In [19]:
# get apparent temperature
df_01['apparent_temperature'] = -2.653+(0.994*df_01['t2m'])+(0.0153*df_01['d2m']**2)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_01['apparent_temperature'] = -2.653+(0.994*df_01['t2m'])+(0.0153*df_01['d2m']**2)


In [20]:
df_01

Unnamed: 0,longitude,latitude,time,u10,d2m,t2m,uvb,lat_long,date,clock,apparent_temperature
1224,-10.0,70.75,2000-05-01 02:00:00,0.655893,-0.507422,-0.052405,-0.015625,"(70.75, -10.0)",2000-05-01,02:00:00,-2.701151
1225,-10.0,70.75,2000-05-01 11:00:00,7.269666,-0.275000,0.310083,118021.210938,"(70.75, -10.0)",2000-05-01,11:00:00,-2.343620
1226,-10.0,70.75,2000-05-01 16:00:00,4.387676,-1.187384,-0.482581,116230.781250,"(70.75, -10.0)",2000-05-01,16:00:00,-3.111114
1227,-10.0,70.75,2000-05-01 23:00:00,-3.255423,-1.065955,-0.611334,13.406250,"(70.75, -10.0)",2000-05-01,23:00:00,-3.243281
1228,-10.0,70.75,2000-05-02 02:00:00,-5.847638,-1.147833,-0.604651,-0.015625,"(70.75, -10.0)",2000-05-02,02:00:00,-3.233865
...,...,...,...,...,...,...,...,...,...,...,...
26830075,29.75,37.0,2001-09-29 23:00:00,-0.056572,0.080927,13.325159,-0.015625,"(37.0, 29.75)",2001-09-29,23:00:00,10.592308
26830076,29.75,37.0,2001-09-30 02:00:00,-0.571236,-1.134650,12.178644,-0.015625,"(37.0, 29.75)",2001-09-30,02:00:00,9.472270
26830077,29.75,37.0,2001-09-30 11:00:00,0.069417,0.148920,20.403558,314560.093750,"(37.0, 29.75)",2001-09-30,11:00:00,17.628476
26830078,29.75,37.0,2001-09-30 16:00:00,-1.068261,0.829584,16.955469,11326.000000,"(37.0, 29.75)",2001-09-30,16:00:00,14.211266


In [21]:
# subset data per hour
sub2am = df_01[df_01['clock'] == '02:00:00']
sub11am = df_01[df_01['clock'] == '11:00:00']
sub16am = df_01[df_01['clock'] == '16:00:00']
sub23am = df_01[df_01['clock'] == '23:00:00']

In [22]:
# rename colums to indicate hours
sub2am = sub2am.rename(columns={"t2m": "temperature_2AM", 'd2m': 'dew_point_2AM', 'uvb': 'uvb_2AM', 'u10': 'wind_2AM', "apparent_temperature": 'apparent_temperature_2AM'})
sub11am = sub11am.rename(columns={"t2m": "temperature_11AM", 'd2m': 'dew_point_11AM', 'uvb': 'uvb_11AM', 'u10': 'wind_11AM', "apparent_temperature": 'apparent_temperature_11AM'})
sub16am = sub16am.rename(columns={"t2m": "temperature_4PM", 'd2m': 'dew_point_4PM', 'uvb': 'uvb_4PM', 'u10': 'wind_4PM', "apparent_temperature": 'apparent_temperature_4PM'})
sub23am = sub23am.rename(columns={"t2m": "temperature_11PM", 'd2m': 'dew_point_11PM', 'uvb': 'uvb_11PM', 'u10': 'wind_11PM', "apparent_temperature": 'apparent_temperature_11PM'})

In [23]:
# drop irrelevant variables from subset
sub2am = sub2am.drop(['clock','time'],1)
sub11am = sub11am.drop(['clock','time','lat_long'],1)
sub16am = sub16am.drop(['clock','time','lat_long'],1)
sub23am = sub23am.drop(['clock','time','lat_long'],1)

In [24]:
# merging subsets of data per hour
df1 = sub2am.merge(sub11am, how='inner', on=['date','latitude', 'longitude'])
df2 = df1.merge(sub16am, how='inner', on=['date','latitude', 'longitude'])
final00_01 = df2.merge(sub23am, how='inner', on=['date','latitude', 'longitude'])
final00_01.head()

Unnamed: 0,longitude,latitude,wind_2AM,dew_point_2AM,temperature_2AM,uvb_2AM,lat_long,date,apparent_temperature_2AM,wind_11AM,...,wind_4PM,dew_point_4PM,temperature_4PM,uvb_4PM,apparent_temperature_4PM,wind_11PM,dew_point_11PM,temperature_11PM,uvb_11PM,apparent_temperature_11PM
0,-10.0,70.75,0.655893,-0.507422,-0.052405,-0.015625,"(70.75, -10.0)",2000-05-01,-2.701151,7.269666,...,4.387676,-1.187384,-0.482581,116230.78125,-3.111114,-3.255423,-1.065955,-0.611334,13.40625,-3.243281
1,-10.0,70.75,-5.847638,-1.147833,-0.604651,-0.015625,"(70.75, -10.0)",2000-05-02,-3.233865,-2.127825,...,2.012161,-4.879218,-2.13938,144294.34375,-4.4153,1.02693,-4.456,-1.934302,73.75,-4.2719
2,-10.0,70.75,3.025739,-2.495245,-0.969031,-0.015625,"(70.75, -10.0)",2000-05-03,-3.520955,2.264138,...,-2.313028,-0.579596,0.077325,125873.648438,-2.570999,4.331611,0.249536,1.028314,46.9375,-1.629903
3,-10.0,70.75,7.728264,-0.142462,1.280115,-0.015625,"(70.75, -10.0)",2000-05-04,-1.380255,6.659881,...,12.233617,-4.493475,-2.335883,95617.296875,-4.66594,7.55251,-1.985968,-0.448218,341.984375,-3.038184
4,-10.0,70.75,4.45004,-2.300269,-0.657111,-0.015625,"(70.75, -10.0)",2000-05-05,-3.225212,-2.0705,...,-5.312817,-4.16181,-0.833594,137615.421875,-3.216586,-6.777434,-2.347449,-0.099127,630.328125,-2.667222


# Merge Locations with weather data

In [26]:
# read locations data
locations = pd.read_csv('locations1.csv', index_col=0)
locations.head()

Unnamed: 0,lat,lon,country,NUTS1,NUTS2,NUTS3
22,54.25,-10.0,IE,IE0,IE04,IE042
23,53.5,-10.0,IE,IE0,IE04,IE042
25,52.0,-10.0,IE,IE0,IE05,IE053
68,54.25,-9.25,IE,IE0,IE04,IE042
69,53.5,-9.25,IE,IE0,IE04,IE042


In [27]:
# change coordinates to string
locations['lat'] = locations['lat'].astype(str)
locations['lon'] = locations['lon'].astype(str)

In [28]:
# be sure to also have coordinates on weather data as string
final00_01['latitude'] = final00_01['latitude'].astype(str)
final00_01['longitude'] = final00_01['longitude'].astype(str)
# merging locations with weather data
df_weather_01 = pd.merge(final00_01, locations,  how='left', left_on=['latitude','longitude'], right_on = ['lat','lon']).drop(['lat', 'lon'], axis = 1)
df_weather_01.head()

Unnamed: 0,longitude,latitude,wind_2AM,dew_point_2AM,temperature_2AM,uvb_2AM,lat_long,date,apparent_temperature_2AM,wind_11AM,...,apparent_temperature_4PM,wind_11PM,dew_point_11PM,temperature_11PM,uvb_11PM,apparent_temperature_11PM,country,NUTS1,NUTS2,NUTS3
0,-10.0,70.75,0.655893,-0.507422,-0.052405,-0.015625,"(70.75, -10.0)",2000-05-01,-2.701151,7.269666,...,-3.111114,-3.255423,-1.065955,-0.611334,13.40625,-3.243281,,,,
1,-10.0,70.75,-5.847638,-1.147833,-0.604651,-0.015625,"(70.75, -10.0)",2000-05-02,-3.233865,-2.127825,...,-4.4153,1.02693,-4.456,-1.934302,73.75,-4.2719,,,,
2,-10.0,70.75,3.025739,-2.495245,-0.969031,-0.015625,"(70.75, -10.0)",2000-05-03,-3.520955,2.264138,...,-2.570999,4.331611,0.249536,1.028314,46.9375,-1.629903,,,,
3,-10.0,70.75,7.728264,-0.142462,1.280115,-0.015625,"(70.75, -10.0)",2000-05-04,-1.380255,6.659881,...,-4.66594,7.55251,-1.985968,-0.448218,341.984375,-3.038184,,,,
4,-10.0,70.75,4.45004,-2.300269,-0.657111,-0.015625,"(70.75, -10.0)",2000-05-05,-3.225212,-2.0705,...,-3.216586,-6.777434,-2.347449,-0.099127,630.328125,-2.667222,,,,


If country or any NUTS NaN it is likely that the coordinates indicate locations on water.

In [29]:
df_weather_01.shape

(760104, 28)

In [30]:
# instances with country NaN are dropped as they do not indicate land area
df_weather_01 = df_weather_01.dropna(subset=['country'])

In [31]:
# the data is reduced to less than half
df_weather_01.shape

(353736, 28)

In [32]:
# show first instances
df_weather_01.head()

Unnamed: 0,longitude,latitude,wind_2AM,dew_point_2AM,temperature_2AM,uvb_2AM,lat_long,date,apparent_temperature_2AM,wind_11AM,...,apparent_temperature_4PM,wind_11PM,dew_point_11PM,temperature_11PM,uvb_11PM,apparent_temperature_11PM,country,NUTS1,NUTS2,NUTS3
6732,-10.0,54.25,-3.165971,7.574976,10.66015,-0.015625,"(54.25, -10.0)",2000-05-01,8.821107,0.163278,...,12.553876,-1.938212,8.068964,10.319635,-0.015625,8.600872,IE,IE0,IE04,IE042
6733,-10.0,54.25,-1.199289,8.215356,9.807397,-0.015625,"(54.25, -10.0)",2000-05-02,8.128182,-1.531899,...,9.440716,-3.546457,7.472284,8.993799,-0.015625,7.141112,IE,IE0,IE04,IE042
6734,-10.0,54.25,-4.2879,6.965784,8.517816,-0.015625,"(54.25, -10.0)",2000-05-03,6.556098,-4.149942,...,9.499821,-3.890405,6.274744,8.534052,-0.015625,6.432245,IE,IE0,IE04,IE042
6735,-10.0,54.25,-4.348374,6.377405,8.555994,-0.015625,"(54.25, -10.0)",2000-05-04,6.473928,-4.595942,...,10.721601,-5.206356,7.957941,9.555597,-0.015625,7.814194,IE,IE0,IE04,IE042
6736,-10.0,54.25,-3.986157,7.328668,9.033838,-0.015625,"(54.25, -10.0)",2000-05-05,7.148388,-3.513699,...,11.483983,-1.806554,8.463068,10.477014,-0.015625,8.856992,IE,IE0,IE04,IE042


# Loading of MRT Dataset

In [33]:
# loading MRT data for the given years
rad_01 = pd.read_csv('rad_with_MRT_01.csv', index_col=0)

In [34]:
# changing coordinates to string
rad_01['latitude'] = rad_01['latitude'].astype(str)
rad_01['longitude'] = rad_01['longitude'].astype(str)

In [35]:
# merge overall weather set with MRT set
weather_final_01 = df_weather_01.merge(rad_01, how='inner', on=['date','latitude', 'longitude'])

In [36]:
# show first instances
weather_final_01.head()

Unnamed: 0,longitude,latitude,wind_2AM,dew_point_2AM,temperature_2AM,uvb_2AM,lat_long,date,apparent_temperature_2AM,wind_11AM,...,uvb_11PM,apparent_temperature_11PM,country,NUTS1,NUTS2,NUTS3,MRT_2AM,MRT_11AM,MRT_4PM,MRT_11PM
0,-10.0,54.25,-3.165971,7.574976,10.66015,-0.015625,"(54.25, -10.0)",2000-05-01,8.821107,0.163278,...,-0.015625,8.600872,IE,IE0,IE04,IE042,2.96,23.1,15.57,3.84
1,-10.0,54.25,-1.199289,8.215356,9.807397,-0.015625,"(54.25, -10.0)",2000-05-02,8.128182,-1.531899,...,-0.015625,7.141112,IE,IE0,IE04,IE042,3.03,23.49,25.09,5.6
2,-10.0,54.25,-4.2879,6.965784,8.517816,-0.015625,"(54.25, -10.0)",2000-05-03,6.556098,-4.149942,...,-0.015625,6.432245,IE,IE0,IE04,IE042,3.64,27.86,23.63,0.11
3,-10.0,54.25,-4.348374,6.377405,8.555994,-0.015625,"(54.25, -10.0)",2000-05-04,6.473928,-4.595942,...,-0.015625,7.814194,IE,IE0,IE04,IE042,1.9,21.78,17.08,0.12
4,-10.0,54.25,-3.986157,7.328668,9.033838,-0.015625,"(54.25, -10.0)",2000-05-05,7.148388,-3.513699,...,-0.015625,8.856992,IE,IE0,IE04,IE042,-0.44,12.57,14.37,5.4


In [37]:
# change date to datetime and get week number
weather_final_01['date'] =  pd.to_datetime(weather_final_01['date'])
weather_final_01['Week_Number'] = weather_final_01['date'].dt.week

  weather_final_01['Week_Number'] = weather_final_01['date'].dt.week


In [38]:
weather_final_01.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 353736 entries, 0 to 353735
Data columns (total 33 columns):
 #   Column                     Non-Null Count   Dtype         
---  ------                     --------------   -----         
 0   longitude                  353736 non-null  object        
 1   latitude                   353736 non-null  object        
 2   wind_2AM                   353736 non-null  float32       
 3   dew_point_2AM              353736 non-null  float64       
 4   temperature_2AM            353736 non-null  float64       
 5   uvb_2AM                    353736 non-null  float32       
 6   lat_long                   353736 non-null  object        
 7   date                       353736 non-null  datetime64[ns]
 8   apparent_temperature_2AM   353736 non-null  float64       
 9   wind_11AM                  353736 non-null  float32       
 10  dew_point_11AM             353736 non-null  float64       
 11  temperature_11AM           353736 non-null  float64 

In [39]:
weather_final_01.head()

Unnamed: 0,longitude,latitude,wind_2AM,dew_point_2AM,temperature_2AM,uvb_2AM,lat_long,date,apparent_temperature_2AM,wind_11AM,...,apparent_temperature_11PM,country,NUTS1,NUTS2,NUTS3,MRT_2AM,MRT_11AM,MRT_4PM,MRT_11PM,Week_Number
0,-10.0,54.25,-3.165971,7.574976,10.66015,-0.015625,"(54.25, -10.0)",2000-05-01,8.821107,0.163278,...,8.600872,IE,IE0,IE04,IE042,2.96,23.1,15.57,3.84,18
1,-10.0,54.25,-1.199289,8.215356,9.807397,-0.015625,"(54.25, -10.0)",2000-05-02,8.128182,-1.531899,...,7.141112,IE,IE0,IE04,IE042,3.03,23.49,25.09,5.6,18
2,-10.0,54.25,-4.2879,6.965784,8.517816,-0.015625,"(54.25, -10.0)",2000-05-03,6.556098,-4.149942,...,6.432245,IE,IE0,IE04,IE042,3.64,27.86,23.63,0.11,18
3,-10.0,54.25,-4.348374,6.377405,8.555994,-0.015625,"(54.25, -10.0)",2000-05-04,6.473928,-4.595942,...,7.814194,IE,IE0,IE04,IE042,1.9,21.78,17.08,0.12,18
4,-10.0,54.25,-3.986157,7.328668,9.033838,-0.015625,"(54.25, -10.0)",2000-05-05,7.148388,-3.513699,...,8.856992,IE,IE0,IE04,IE042,-0.44,12.57,14.37,5.4,18


In [40]:
# round data to 2 decimals
weather_final_01 = weather_final_01.round(decimals=2)
weather_final_01

Unnamed: 0,longitude,latitude,wind_2AM,dew_point_2AM,temperature_2AM,uvb_2AM,lat_long,date,apparent_temperature_2AM,wind_11AM,...,apparent_temperature_11PM,country,NUTS1,NUTS2,NUTS3,MRT_2AM,MRT_11AM,MRT_4PM,MRT_11PM,Week_Number
0,-10.0,54.25,-3.17,7.57,10.66,-0.02,"(54.25, -10.0)",2000-05-01,8.82,0.16,...,8.60,IE,IE0,IE04,IE042,2.96,23.10,15.57,3.84,18
1,-10.0,54.25,-1.20,8.22,9.81,-0.02,"(54.25, -10.0)",2000-05-02,8.13,-1.53,...,7.14,IE,IE0,IE04,IE042,3.03,23.49,25.09,5.60,18
2,-10.0,54.25,-4.29,6.97,8.52,-0.02,"(54.25, -10.0)",2000-05-03,6.56,-4.15,...,6.43,IE,IE0,IE04,IE042,3.64,27.86,23.63,0.11,18
3,-10.0,54.25,-4.35,6.38,8.56,-0.02,"(54.25, -10.0)",2000-05-04,6.47,-4.60,...,7.81,IE,IE0,IE04,IE042,1.90,21.78,17.08,0.12,18
4,-10.0,54.25,-3.99,7.33,9.03,-0.02,"(54.25, -10.0)",2000-05-05,7.15,-3.51,...,8.86,IE,IE0,IE04,IE042,-0.44,12.57,14.37,5.40,18
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
353731,29.75,61.75,2.94,2.30,3.38,-0.02,"(61.75, 29.75)",2001-09-26,0.79,2.99,...,2.75,FI,FI1,FI1C,FI1C5,0.88,14.30,2.49,1.24,39
353732,29.75,61.75,2.75,4.69,5.07,-0.02,"(61.75, 29.75)",2001-09-27,2.72,2.01,...,5.84,FI,FI1,FI1C,FI1C5,0.27,10.55,3.56,2.00,39
353733,29.75,61.75,2.24,6.50,7.68,-0.02,"(61.75, 29.75)",2001-09-28,5.62,2.69,...,-1.42,FI,FI1,FI1C,FI1C5,2.56,17.59,4.47,-6.59,39
353734,29.75,61.75,3.64,-0.16,0.94,-0.02,"(61.75, 29.75)",2001-09-29,-1.72,3.54,...,-0.62,FI,FI1,FI1C,FI1C5,-6.83,10.64,-4.16,-7.73,39


In [41]:
# drop possible duplicates
weather_final_01 = weather_final_01.drop_duplicates(keep='first')
weather_final_01.shape

(353736, 33)

No duplicates found.

In [42]:
# write data to csv
weather_final_01.to_csv('FINAL_WEATHER_MERGED_01')