In [1]:
# Import Libraries and Setup Environment
import numpy as np
import pandas as pd

pd.set_option('display.max_columns', None)

# Extract Data from CSV
df = pd.read_csv(filepath_or_buffer='weather_original.csv', sep=',', encoding='utf-8', header=0, usecols=[1,10,11,12,13,14,15], low_memory=False)
df = df[['DATE','TMP','WND','DEW','CIG','VIS','SLP']]
df = df.rename(columns={"TMP":"Temperature", "WND":"Wind", "DEW":"Dew", "CIG":"Sky", "VIS":"Visibility", "SLP":"ATM"})

df["Temperature"]=pd.Series(df.Temperature).replace('\+', '', regex=True).replace(',', '.', regex=True).apply(pd.to_numeric, args=('coerce',))
df["Dew"]=pd.Series(df.Dew).replace('\+', '', regex=True).replace(',', '.', regex=True).apply(pd.to_numeric, args=('coerce',))
df["ATM"]=pd.Series(df.ATM).replace('\+', '', regex=True).replace(',', '.', regex=True).apply(pd.to_numeric, args=('coerce',))

df.head()


Unnamed: 0,DATE,Temperature,Wind,Dew,Sky,Visibility,ATM
0,2019-01-01T00:00:00,89.1,"999,9,C,0000,1",78.1,"99999,9,9,N",006000199,10165.1
1,2019-01-01T00:11:00,89.5,"999,9,C,0000,5",78.5,"00152,5,M,N","004023,5,N,5",99999.9
2,2019-01-01T00:17:00,89.5,"999,9,C,0000,5",78.5,"00122,5,M,N","002414,5,N,5",99999.9
3,2019-01-01T00:22:00,83.5,"999,9,C,0000,5",67.5,"00122,5,M,N","001207,5,N,5",99999.9
4,2019-01-01T00:47:00,80.5,"999,9,C,0000,5",70.5,"00122,5,M,N","001609,5,N,5",99999.9


In [2]:
# Transform Data

# Temperature
df['Temperature'] = df['Temperature'].divide(other=10)

# Wind
data = df['Wind'].str.split(pat=",", expand=True)
df['Wind-Dir'] = data[0].astype('int64')
df['Wind-Rate'] = data[3].astype('float64').divide(other=10)

# Dew
df['Dew'] = df['Dew'].divide(other=10)

# Sky
data = df['Sky'].str.split(pat=",", expand=True)
df['Sky'] = data[0]
df['Sky'] = data[0].astype('int64')

# Visibility
data = df['Visibility'].str.split(pat=",", expand=True)
df['Visibility'] = data[0]
df['Visibility'] = data[0].astype('int64')

# ATM
df['ATM'] = df['ATM'].divide(other=10)

In [3]:
import math

def getSin(wind):
    if wind >= 0 and wind <= 360:
        return math.sin(math.radians(wind))
    return None

def getCos(wind):
    if wind >= 0 and wind <= 360:
        return math.cos(math.radians(wind))
    return None


df.loc[:,"sin_wind"] = df.apply(lambda row: getSin(row["Wind-Dir"]), axis =1)
df.loc[:,"cos_wind"] = df.apply(lambda row: getCos(row["Wind-Dir"]), axis =1)

In [4]:
# Save
df.to_csv('weather.csv', index=False)


In [5]:
df

Unnamed: 0,DATE,Temperature,Wind,Dew,Sky,Visibility,ATM,Wind-Dir,Wind-Rate,sin_wind,cos_wind
0,2019-01-01T00:00:00,8.91,"999,9,C,0000,1",7.81,99999,6000,1016.51,999,0.0,,
1,2019-01-01T00:11:00,8.95,"999,9,C,0000,5",7.85,152,4023,9999.99,999,0.0,,
2,2019-01-01T00:17:00,8.95,"999,9,C,0000,5",7.85,122,2414,9999.99,999,0.0,,
3,2019-01-01T00:22:00,8.35,"999,9,C,0000,5",6.75,122,1207,9999.99,999,0.0,,
4,2019-01-01T00:47:00,8.05,"999,9,C,0000,5",7.05,122,1609,9999.99,999,0.0,,
...,...,...,...,...,...,...,...,...,...,...,...
14444,2019-12-31T20:52:00,11.15,"230,5,N,0057,5",-1.75,22000,16093,1005.05,230,5.7,-0.766044,-0.642788
14445,2019-12-31T21:00:00,11.11,"230,1,N,0057,1",-1.71,22000,16000,1005.01,230,5.7,-0.766044,-0.642788
14446,2019-12-31T21:52:00,11.15,"220,5,N,0046,5",-1.15,2134,16093,1005.25,220,4.6,-0.642788,-0.766044
14447,2019-12-31T22:52:00,10.65,"220,5,N,0031,5",-1.15,2743,16093,1005.45,220,3.1,-0.642788,-0.766044
