<a href="https://colab.research.google.com/github/chrisyan04/genesis/blob/main/LSTM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Use seaborn for pairplot.
!pip install -q seaborn

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

# Make NumPy printouts easier to read.
np.set_printoptions(precision=3, suppress=True)

In [None]:
import tensorflow as tf

from tensorflow import keras
from tensorflow.keras import layers

print(tf.__version__)

In [None]:
df = pd.read_csv("food_prices.csv", index_col=0)
df = df.dropna()
df['date'] = pd.to_datetime(df['date'])
df = df.drop(df[df['category'] == 'non-food'].index)
df = df.drop(df[df['unit'].isin(['Marmite', 'Bunch', 'Head'])].index)
df['unit'].unique()

unit_conversion = {
    'KG': 1,
    'L': 1,
    'Unit':1,
    'Dozen':0.08333333333,
    '10 pcs': 0.1,
    '30 pcs': 0.3,
    'Bundle': 1,
    'Loaf': 1,
    '100 KG': 0.01,
    'Pound': 0.453592,
    'Gallon': 3.78541,
    '400 G': 2.5,
    '500 ML': 2,
    '200 ML': 5,
    '200 G': 5,
    '1.6 KG': 1 / 1.6,
    '10 KG': 0.1,
    '1.3 KG': 1 / 1.3,
    '1.4 KG': 1 / 1.4,
    '20 G': 50,
    '1.1 KG': 1 / 1.1,
    '0.5 KG': 2,
    '1.5 KG': 1 / 1.5,
    '750 ML': 1.33333333333,
    '3.4 KG': 1 / 3.4,
    '300 G': 10 / 3,
    '2.1 KG': 1 / 2.1,
    '1.2 KG': 1 / 1.2,
    '3.1 KG': 1 / 3.1,
    '100 L': 0.01,
    '250 G': 4,
    '2.5 KG': 0.4,
    '500 G': 2,
    '3.5 KG': 0.285714286
}


df['usdprice'] *= df['unit'].map(unit_conversion)
df

Unnamed: 0_level_0,date,category,commodity,unit,usdprice,country
del,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
147,2004-05-15,cereals and tubers,Rice (coarse),KG,0.2335,Bangladesh
150,2004-06-15,cereals and tubers,Rice (coarse),KG,0.2393,Bangladesh
159,2004-09-15,cereals and tubers,Rice (coarse),KG,0.2430,Bangladesh
168,2004-12-15,cereals and tubers,Rice (coarse),KG,0.2981,Bangladesh
174,2005-02-15,cereals and tubers,Rice (coarse),KG,0.2850,Bangladesh
...,...,...,...,...,...,...
32409,2023-12-15,cereals and tubers,Wheat,KG,4.0024,Yemen
32410,2023-12-15,cereals and tubers,Wheat flour,KG,4.0024,Yemen
32411,2023-12-15,miscellaneous food,Salt,KG,1.2007,Yemen
32418,2023-12-15,oil and fats,Oil (vegetable),L,7.6046,Yemen


In [None]:
df = df.drop(['category','unit'],axis=1)
df['commodity'] = df['commodity'].astype('string')
df['country'] = df['country'].astype('string')
# df.dtypes
print(df['country'].unique())
bangladesh_df = df[df['country'] == "Bangladesh"]
bangladesh_foods = bangladesh_df['commodity'].unique().tolist()

ethiopia_df = df[df['country'] == "Ethipoia"]
ethiopia_foods = df['commodity'].unique().tolist()

ukraine_df = df[df['country'] == "Ukraine"]
ukraine_foods = df['commodity'].unique().tolist()

yemen_df = df[df['country'] == "Yemen"]
yemen_foods = df['commodity'].unique().tolist()

somalia_df = df[df['country'] == "Somalia"]
somalia_foods = df['commodity'].unique().tolist()

nigeria_df = df[df['country'] == "Nigeria"]
nigeria_foods = df['commodity'].unique().tolist()

myanmar_df = df[df['country'] == "Myanmar"]
myanmar_foods = df['commodity'].unique().tolist()

haiti_df = df[df['country'] == "Haiti"]
haiti_foods = df['commodity'].unique().tolist()

pakistan_df = df[df['country'] == "Pakistan"]
pakistan_foods = df['commodity'].unique().tolist()

india_df = df[df['country'] == "India"]
india_foods = df['commodity'].unique().tolist()

uganda_df = df[df['country'] == "Uganda"]
uganda_foods = df['commodity'].unique().tolist()

rwanda_df = df[df['country'] == "Rwanda"]
rwanda_foods = df['commodity'].unique().tolist()

kenya_df = df[df['country'] == "Kenya"]
kenya_foods = df['commodity'].unique().tolist()

senegal_df = df[df['country'] == "Senegal"]
senegal_foods = df['commodity'].unique().tolist()

indonesia_df = df[df['country'] == "Indonesia"]
indonesia_foods = df['commodity'].unique().tolist()


<StringArray>
[ 'Bangladesh',    'Ethipoia',       'Haiti',   'Indonesia',       'India',
       'Kenya',     'Myanmar',     'Nigeria',    'Pakistan', 'Philippines',
      'Rwanda',     'Senegal',     'Somalia', 'South Sudan',      'Uganda',
     'Ukraine',       'Yemen']
Length: 17, dtype: string


In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Bidirectional

bangladesh_df = bangladesh_df.sort_values(by='date')

scaler = MinMaxScaler(feature_range=(0, 1))
bangladesh_df['usdprice_scaled'] = scaler.fit_transform(bangladesh_df['usdprice'].values.reshape(-1,1))

def create_sequences(data, seq_length):
    X, y = [], []
    for i in range(len(data) - seq_length):
        X.append(data[i:i + seq_length])
        y.append(data[i + seq_length])
    return np.array(X), np.array(y)

seq_length = 10

X, y = create_sequences(bangladesh_df['usdprice_scaled'].values, seq_length)

X = X.reshape(X.shape[0], seq_length, 1)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = Sequential([
    Bidirectional(LSTM(50, return_sequences=True), input_shape=(seq_length, 1)),
    Bidirectional(LSTM(50, return_sequences=False)),
    Dense(1)
])

model.compile(optimizer='adam', loss='mean_squared_error')
model.fit(X_train, y_train, epochs=10, batch_size=32, verbose=1)

future_dates_2025 = pd.date_range(start='2023-11-15', end='2023-11-16')

bangladesh_foods = bangladesh_df['commodity'].unique()

future_predictions_2025 = {}

for food in bangladesh_foods:
    future_predictions_2025[food] = {}
    for future_date in future_dates_2025:

        historical_data = bangladesh_df[(bangladesh_df['date'] <= future_date) & (bangladesh_df['commodity'] == food)]

        if len(historical_data) >= seq_length:
            input_sequence = historical_data.tail(seq_length)['usdprice_scaled'].values
            input_sequence = np.array(input_sequence).reshape(1, seq_length, 1)

            future_price_scaled = model.predict(input_sequence)[0][0]
            future_price = scaler.inverse_transform([[future_price_scaled]])[0][0]

            future_predictions_2025[food][future_date] = future_price
        else:
            future_predictions_2025[food][future_date] = None

for food, predictions in future_predictions_2025.items():
    print(f"Predictions for {food} in 2025:")
    for future_date, future_price in predictions.items():
        if future_price is not None:
            print(f"{future_date}: {future_price}")
        else:
            print(f"{future_date}: Not enough historical data for prediction")


model.save("bangladesh_price_prediction_model.keras")



Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Predictions for Rice (coarse) in 2025:
2023-11-15 00:00:00: 0.6141300765275955
2023-11-16 00:00:00: 0.6141300765275955
Predictions for Oil (palm) in 2025:
2023-11-15 00:00:00: 0.897356217712164
2023-11-16 00:00:00: 0.897356217712164
Predictions for Wheat flour in 2025:
2023-11-15 00:00:00: 0.6361981358230114
2023-11-16 00:00:00: 0.6361981358230114
Predictions for Lentils (masur) in 2025:
2023-11-15 00:00:00: 0.837330354064703
2023-11-16 00:00:00: 0.837330354064703
Predictions for Rice (medium grain) in 2025:
2023-11-15 00:00:00: 0.6429302573800088
2023-11-16 00:00:00: 0.6429302573800088
Predictions for Rice (coarse, BR-8/ 11/, Guti Sharna) in 2025:
2023-11-15 00:00:00: 0.6324088032394648
2023-11-16 00:00:00: 0.6324088032394648
Predictions for Wheat in 2025:
2023-11-15 00:00:00: 0.5955683714002371
2023-11-16 00:00:00: 0.5955683714002371
Predictions for Rice (coarse, Guti Sharna

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Bidirectional

ethiopia_df = ethiopia_df.sort_values(by='date')

scaler = MinMaxScaler(feature_range=(0, 1))
ethiopia_df['usdprice_scaled'] = scaler.fit_transform(ethiopia_df['usdprice'].values.reshape(-1,1))

def create_sequences(data, seq_length):
    X, y = [], []
    for i in range(len(data) - seq_length):
        X.append(data[i:i + seq_length])
        y.append(data[i + seq_length])
    return np.array(X), np.array(y)

seq_length = 10

X, y = create_sequences(ethiopia_df['usdprice_scaled'].values, seq_length)

X = X.reshape(X.shape[0], seq_length, 1)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = Sequential([
    Bidirectional(LSTM(50, return_sequences=True), input_shape=(seq_length, 1)),
    Bidirectional(LSTM(50, return_sequences=False)),
    Dense(1)
])

model.compile(optimizer='adam', loss='mean_squared_error')
model.fit(X_train, y_train, epochs=10, batch_size=32, verbose=1)

future_dates_2025 = pd.date_range(start='2023-11-15', end='2023-11-16')

ethiopia_foods = ethiopia_df['commodity'].unique()

future_predictions_2025 = {}

for food in ethiopia_foods:
    future_predictions_2025[food] = {}
    for future_date in future_dates_2025:

        historical_data = ethiopia_df[(ethiopia_df['date'] <= future_date) & (ethiopia_df['commodity'] == food)]

        if len(historical_data) >= seq_length:

            input_sequence = historical_data.tail(seq_length)['usdprice_scaled'].values
            input_sequence = np.array(input_sequence).reshape(1, seq_length, 1)

            future_price_scaled = model.predict(input_sequence)[0][0]

            future_predictions_2025[food][future_date] = future_price_scaled
        else:

            future_predictions_2025[food][future_date] = None

for food, predictions in future_predictions_2025.items():
    print(f"Predictions for {food} in 2025:")
    for future_date, future_price in predictions.items():
        if future_price is not None:
            print(f"{future_date}: {future_price}")
        else:
            print(f"{future_date}: Not enough historical data for prediction")


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Predictions for Maize (white) in 2025:
2023-11-15 00:00:00: 0.0037406827323138714
2023-11-16 00:00:00: 0.0037406827323138714
Predictions for Wheat in 2025:
2023-11-15 00:00:00: 0.004054323770105839
2023-11-16 00:00:00: 0.004054323770105839
Predictions for Sorghum in 2025:
2023-11-15 00:00:00: 0.003424010705202818
2023-11-16 00:00:00: 0.003424010705202818
Predictions for Beans (fava, dry) in 2025:
2023-11-15 00:00:00: 0.003551127854734659
2023-11-16 00:00:00: 0.003551127854734659
Predictions for Lentils in 2025:
2023-11-15 00:00:00: 0.005336347501724958
2023-11-16 00:00:00: 0.005336347501724958
Predictions for Chickpeas in 2025:
2023-11-15 00:00:00: 0.0038190269842743874
2023-11-16 00:00:00: 0.0038190269842743874
Predictions for Peas (green, dry) in 2025:
2023-11-15 00:00:00: Not enough historical data for prediction
2023-11-16 00:00:00: Not enough historical data for predictio

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Bidirectional

ukraine_df = ukraine_df.sort_values(by='date')

scaler = MinMaxScaler(feature_range=(0, 1))
ukraine_df['usdprice_scaled'] = scaler.fit_transform(ukraine_df['usdprice'].values.reshape(-1,1))

def create_sequences(data, seq_length):
    X, y = [], []
    for i in range(len(data) - seq_length):
        X.append(data[i:i + seq_length])
        y.append(data[i + seq_length])
    return np.array(X), np.array(y)

seq_length = 10

X, y = create_sequences(ukraine_df['usdprice_scaled'].values, seq_length)

X = X.reshape(X.shape[0], seq_length, 1)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = Sequential([
    Bidirectional(LSTM(50, return_sequences=True), input_shape=(seq_length, 1)),
    Bidirectional(LSTM(50, return_sequences=False)),
    Dense(1)
])

model.compile(optimizer='adam', loss='mean_squared_error')
model.fit(X_train, y_train, epochs=10, batch_size=32, verbose=1)

future_dates_2025 = pd.date_range(start='2023-11-15', end='2023-11-16')

ukraine_foods = ukraine_df['commodity'].unique()

future_predictions_2025 = {}

for food in ukraine_foods:
    future_predictions_2025[food] = {}
    for future_date in future_dates_2025:

        historical_data = ukraine_df[(ukraine_df['date'] <= future_date) & (ukraine_df['commodity'] == food)]

        if len(historical_data) >= seq_length:
            input_sequence = historical_data.tail(seq_length)['usdprice_scaled'].values
            input_sequence = np.array(input_sequence).reshape(1, seq_length, 1)

            future_price_scaled = model.predict(input_sequence)[0][0]
            future_price = scaler.inverse_transform([[future_price_scaled]])[0][0]

            future_predictions_2025[food][future_date] = future_price
        else:
            future_predictions_2025[food][future_date] = None


for food, predictions in future_predictions_2025.items():
    print(f"Predictions for {food} in 2025:")
    for future_date, future_price in predictions.items():
        if future_price is not None:
            print(f"{future_date}: {future_price}")
        else:
            print(f"{future_date}: Not enough historical data for prediction")


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Predictions for Meat (beef) in 2025:
2023-11-15 00:00:00: 2.170642958574295
2023-11-16 00:00:00: 2.170642958574295
Predictions for Bread (wheat) in 2025:
2023-11-15 00:00:00: 1.930396490430832
2023-11-16 00:00:00: 1.930396490430832
Predictions for Meat (pork) in 2025:
2023-11-15 00:00:00: 2.331911262598038
2023-11-16 00:00:00: 2.331911262598038
Predictions for Sour cream in 2025:
2023-11-15 00:00:00: 2.1431699010181426
2023-11-16 00:00:00: 2.1431699010181426
Predictions for Sugar in 2025:
2023-11-15 00:00:00: 1.9433474968624116
2023-11-16 00:00:00: 1.9433474968624116
Predictions for Oil (sunflower) in 2025:
2023-11-15 00:00:00: 2.0428315559959414
2023-11-16 00:00:00: 2.0428315559959414
Predictions for Cabbage in 2025:
2023-11-15 00:00:00: 1.872241140356064
2023-11-16 00:00:00: 1.872241140356064
Predictions for Bread (rye) in 2025:
2023-11-15 00:00:00: 1.9462155433082582
2023-1