In [None]:
# Data Manipulation Tools
import pandas as pd
import numpy as np

# Data Visualization Tools
import matplotlib.pyplot as plt
import seaborn as sns

# Data Preprocessing Tools
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import LabelEncoder

import warnings
warnings.filterwarnings(action='ignore')

In [None]:
priceData = pd.read_csv("./data/processed/pricesList.csv")
uniqueNames = priceData['Name'].unique()
def extractDate(data):
    data2 = data.copy()
    data2["Datetime"] = pd.to_datetime(data2[['Year', 'Month']].assign(day=(data2["Week"]-1)*7 +1))
    data2 = data2.set_index('Datetime')
    data2 = data2.drop(columns=['Year', 'Month', 'Week', 'Season'])
    return data2
priceData = extractDate(priceData)
priceData = priceData[priceData['Name'] == uniqueNames[0]]
priceData = priceData.drop(columns=['Name'])

In [None]:
rainfallData = pd.read_csv("./data/processed/rainfallData.csv")
def extractDate2(data):
    data2 = data.copy()
    data2["Datetime"] = pd.to_datetime(data2[['year', 'month']].assign(day=1))
    data2 = data2.set_index('Datetime')
    data2 = data2.sort_index()
    data2 = data2.drop(columns=['year', 'month'])
    return data2
rainfallData = extractDate2(rainfallData)

In [None]:
fuelData = pd.read_csv("./data/processed/ceypetcoPrices.csv")
def extractDate3(data):
    data2 = data.copy()
    data2 = data2[~data2["Date"].str.contains("PM")]
    data2["Datetime"] = pd.to_datetime(data2["Date"], format='%d.%m.%Y')
    data2 = data2.set_index('Datetime')
    data2 = data2.sort_index()
    data2 = data2.drop(columns=['Date'])
    return data2
fuelData = extractDate3(fuelData)

In [None]:
dollarData = pd.read_csv("./data/processed/dollarRate.csv")
def extractDate4(data):
    data2 = data.copy()
    data2["Datetime"] = pd.to_datetime(data2["Date"])
    data2 = data2.set_index('Datetime')
    data2 = data2.drop(columns=['Date'])
    return data2
dollarData = extractDate4(dollarData)

In [None]:
def imputeData(data, column):
    imputer = SimpleImputer(strategy='mean', missing_values=np.nan)
    imputer = imputer.fit(data[[column]])
    data[[column]] = imputer.transform(data[[column]])
    data[column] = data[column].round(2)
    return data
priceData = imputeData(priceData, "Price")
fuelData = imputeData(fuelData, "Price")


In [None]:
# rainfall data plot
fix, ax = plt.subplots(figsize=(15, 5))
priceData["Price"].plot(ax=ax, color="red")
rainfallData["anuradhapura"].plot(ax=ax, alpha=0.7, color="green")
rainfallData["jaffna"].plot(ax=ax, alpha=0.7, color="orange")
rainfallData["nuwaraeliya"].plot(ax=ax, alpha=0.7, color="blue")
ax.set_title("Rainfall Data to Vegetable Price")
ax.legend(["Vegetable Price", "Anuradhapura", "Jaffna", "Nuwara Eliya"])
plt.show()

In [None]:
fix, ax = plt.subplots(figsize=(15, 5))
priceData['Price'].plot(ax=ax, color="red")
(dollarData["Buy Rate"] * 2).plot(ax=ax, alpha=0.5)
(dollarData["Sell Rate"] * 2).plot(ax=ax, alpha=0.5)
ax.set_title("Dollar Rate to Vegetable Price")
ax.legend(["Vegetable Price", "Buy Rate", "Sell Rate"])

In [None]:
# Plot the data
fix, ax = plt.subplots(figsize=(15, 5))
priceData['Price'].plot(ax=ax, color="red") 
(fuelData[fuelData["Name"] == fuelData["Name"].unique()[5]]["Price"].loc["2017-01-01":] * 2).plot(ax=ax, alpha=0.7)
(fuelData[fuelData["Name"] == fuelData["Name"].unique()[4]]["Price"].loc["2017-01-01":] * 2).plot(ax=ax, alpha=0.7)
ax.set_title("Fuel Price to Vegetable Price")
ax.legend(["Vegetable Price", "LAD Price (Lanka Auto Diesel)", "LP92 Price (Lanka Petrol 92)"])

In [None]:
fuelData["Name"].unique()