# Petrol Price Prediction

Importing Libaries

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn import metrics

Data Collection and Processing

In [2]:
# Loading data from a csv
usd_pkr = pd.read_csv('Datasets/PKR _ US$ Exchange Rates.csv')
petrol_price = pd.read_csv('Datasets/Petrol Price.csv')

In [None]:
# Petrol Price
petrol_price.head()

In [None]:
# USD - PKR
usd_pkr.head()

In [None]:
# Merging datasets with required values
# Fixing the date problem
usd_pkr["Date"] = pd.to_datetime(usd_pkr['Date'])
petrol_price['Weekly'] = pd.to_datetime(petrol_price['Weekly'])

usd_pkr["Petrol_Price"] = usd_pkr["Date"].apply(lambda date: petrol_price.loc[petrol_price["Weekly"].sub(date).abs().idxmin(), "Karachi"])
usd_pkr['Petrol_Price'][:3] = 273.95

petrol_price_pred = pd.DataFrame(usd_pkr)

In [None]:
# Merged Data Frame
petrol_price_pred.head()

In [None]:
# Checking the info
petrol_price_pred.info()

In [None]:
# Checking missing values
petrol_price_pred.isnull().sum()

In [None]:
# Checking the statistics
petrol_price_pred.describe()

Correlation

1.   Positive Correlation
2.   Negative Correlation



In [None]:
# Finding the correlation
correlation = petrol_price_pred.corr()

# constructing heatmap
plt.figure(figsize = (8,8))
sns.heatmap(correlation, cbar=True, square=True, fmt='.1f', annot=True, annot_kws={'size':8}, cmap='Reds')

In [None]:
# Correlation Values of Petrol
print(correlation['Petrol_Price'])

In [None]:
# Checking the distribution
sns.distplot(petrol_price_pred['Petrol_Price'], color='Red')

Splitting the Features and Targets

In [None]:
X = petrol_price_pred.drop(['Date', 'Petrol_Price'], axis=1)
Y = petrol_price_pred['Petrol_Price']

print(X, Y)

Splitting into training and test data

In [None]:
# Training Data
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=2)

# Model Training
regressor = RandomForestRegressor(n_estimators=100)
regressor.fit(X_train, Y_train)

Model Evaluation

In [None]:
# Prediction on test data
test_data_pred = regressor.predict(X_test)
test_data_pred

In [None]:
# R-squared Error
error_score = metrics.r2_score(Y_test, test_data_pred)
print('R-squared error: ', error_score)

Comparing the actual values with predicted values in a Plot

In [None]:
# Converting to a list
Y_test = list(Y_test)

# Time to plot
plt.plot(Y_test, color='Blue', label='Actual Value')
plt.plot(test_data_pred, color='Green', label='Predicted Value')
plt.title('Acutal Price VS Predicted Price')
plt.xlabel('No. of values')
plt.ylabel('Petrol Price')
plt.legend()
plt.show()