**IMPORTING PACKAGES**

In [2]:
!pip install xgboost

ERROR: Could not find a version that satisfies the requirement xgboost (from versions: none)
ERROR: No matching distribution found for xgboost


In [1]:
import yfinance as yf

import sys
import pandas as pd
import math
import numpy as np

import matplotlib
from matplotlib import pyplot as plt
from matplotlib import style
import seaborn as sns

import sklearn
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import ShuffleSplit
from sklearn.model_selection import GridSearchCV
from sklearn import metrics
import xgboost
from xgboost import XGBRegressor

from datetime import datetime
from dateutil.relativedelta import relativedelta

ModuleNotFoundError: No module named 'xgboost'

In [None]:
### Downloading Required Data:coffee, wheat, cotton, maize, soya bean
commodities_list = ['coffee', 'wheat', 'cotton', 'corn', 'soybeans']

## Github URL for the commodity prices: All prices are downloaded from https://www.indexmundi.com/commodities/
url = 'https://raw.githubusercontent.com/drakub/wqu/main/'

## Fetching the prices of each commodity
coffee = pd.read_csv(url + 'coffee.csv')
cotton = pd.read_csv(url + 'cotton.csv')
maize  = pd.read_csv(url + 'corn.csv')
wheat  = pd.read_csv(url + 'wheat.csv')
soybean  = pd.read_csv(url + 'soybeans.csv')

In [None]:
## Droping the change column in commodity prices
coffee.drop('Change', inplace = True, axis = 1)
cotton.drop('Change', inplace = True, axis = 1)
maize.drop('Change', inplace = True, axis = 1)
wheat.drop('Change', inplace = True, axis = 1)
soybean.drop('Change', inplace = True, axis = 1)

In [None]:
## Renaming price columns
coffee.rename(columns = {'Price': 'coffee'}, inplace = True)
cotton.rename(columns = {'Price': 'cotton'}, inplace = True)
maize.rename(columns = {'Price': 'maize'}, inplace = True)
wheat.rename(columns = {'Price': 'wheat'}, inplace = True)
soybean.rename(columns = {'Price': 'soybean'}, inplace = True)

In [None]:
## Merging the commodity prices to create one dataframe
prices = (coffee.merge(cotton).merge(maize).merge(wheat).merge(soybean))

In [None]:
prices.head()

In [None]:
prices.tail()

In [None]:
## Formatting the date column
from dateutil.parser import parse
prices['Month'] = prices['Month'].map(lambda d: parse('01 ' + d))

In [None]:
prices.head()

In [None]:
## Changing the date column
prices.rename(columns = {'Month':'Date'}, inplace=True)

In [None]:
prices.head()

**Data Analysis and Visualization**

In [None]:
prices.info()

In [None]:
prices.describe()

In [None]:
## Convert the date column into datetime and make it index column
prices['Date'] = pd.to_datetime(prices['Date'])
prices.set_index('Date', inplace=True)

In [None]:
## Visualize the commodity prices
commodities = [commodity for commodity in prices.columns if prices[commodity].dtypes != 'O']

style.use('ggplot')
prices.plot(y = commodities)
plt.xlabel('Months /Years')
plt.ylabel('Monthly Prices Per Metric Ton\n(US Dollars)')
plt.show()

In [None]:
## Visualized scaled prices
min_max_scaled_prices = prices.copy()
for column in prices:
  num_ = (min_max_scaled_prices[column] - min_max_scaled_prices[column].min())
  den_ = (min_max_scaled_prices[column].max() - min_max_scaled_prices[column].min())

  min_max_scaled_prices[column] = num_/den_

min_max_scaled_prices.plot(y = commodities)
plt.xlabel('Months /Years')
plt.ylabel('Scaled Monthly Prices Per Metric Ton \n(US Dollars)')
plt.show()

In [None]:
## Visualizing the Correlation among the commodity prices
corMatrix = prices.corr()
fig = plt.figure(figsize=(7, 5))
sns.heatmap(corMatrix, annot = True)
plt.show()

In [None]:
## Generating Time Series
plt.figure(figsize=(20, 60))
plotnumber = 1
for feature in commodities:
    ax = plt.subplot(12, 3, plotnumber)
    sns.histplot(prices[feature], color = "red", kde = True, stat = "density", linewidth = 0)
    plotnumber+=1
plt.show()

In [None]:
## Box plots
plt.figure(figsize=(20, 60))
plotnumber = 1
for feature in commodities:
    ax = plt.subplot(12, 3, plotnumber)
    sns.boxplot(x = prices[feature])
    plt.xlabel(feature)
    plotnumber+=1
plt.show()

**TRAINING ALGORITHM AND PREDICTING PRICES**

In [None]:
def findModelUsingGridSearchCv(x, y):
    algos = {
        'DecisionTreeRegressor': {
            'model': DecisionTreeRegressor(),
            'params': {
                'criterion': ['squared_error','friedman_mse'],
                'splitter': ['best','random']
            }
        },
        'RandomForestRegressor': {
            'model': RandomForestRegressor(),
            'params': {
                'n_estimators': [10, 50, 100, 130],
                'criterion': ['squared_error'],
                'max_depth': range(2, 4, 1),
            }
        },
        'XGBRegressor': {
            'model': XGBRegressor(),
            'params': {
                'objective': ['reg:squarederror'],
                'learning_rate': [0.5, 0.1, 0.01, 0.001],
                'max_depth': [2, 3],
                'n_estimators': [10, 50, 100, 200]
            }
        },
        'LinearRegression': {
            'model': LinearRegression(),
            'params': {}
        }

    }
    scores = []
    cv = ShuffleSplit(n_splits = 5, test_size = 0.2, random_state = 0)
    for algo_name, config in algos.items():
        gs = GridSearchCV(config['model'], config['params'],cv = cv, return_train_score = False)
        gs.fit(x,y)
        scores.append({
            'model':algo_name,
            'best_score': gs.best_score_,
            'best_params': gs.best_params_
        })
    return pd.DataFrame(scores, columns = ['model', 'best_score', 'best_params'])

In [None]:
def plotCorrGraph(commodity):
  plt.figure(figsize=(20, 60))
  plotnumber = 1
  for item in commodities:
      ax = plt.subplot(12, 3, plotnumber)
      plt.scatter(prices[item], prices[commodity])
      plt.xlabel(item)
      plt.ylabel(commodity)
      plotnumber += 1
  plt.show()

In [None]:
def spitDataSet(target):
  y = prices[target]
  X = prices.drop([target],axis=1)
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size =0.2, random_state=0)
  return X, y

In [None]:
def plotOriginalPredictedPrices(predicted, original, target):
  plt.plot(predicted,color="red",label="Predicted Price")
  plt.plot(list(original),color="green", label="Original Price")
  plt.title(target)
  plt.xlabel('No. of values')
  plt.ylabel('Price')
  plt.legend()
  plt.show()

In [None]:
## Print the correlation matrix for coffee
print(corMatrix['coffee'])

## Visualize the correlation between coffee prices and other commodity prices
plotCorrGraph('coffee')

In [None]:
## Find the best model for Coffee Price Prediction
split = spitDataSet('coffee')
X = split[0]
y = split[1]

In [None]:
pd.set_option('display.max_colwidth', 100)
findModelUsingGridSearchCv(X,y)