In [1]:
import pandas as pd

# Load the datasets
prices_weather_production_df = pd.read_csv('/content/Food_Prices_Kenya.csv')
counties_regions_df = pd.read_csv('/content/Kenyan_Counties_with_Regions.csv')

# Display the first few rows to understand the structure
print(prices_weather_production_df.head())
print(counties_regions_df.head())

# Clean the Food_Prices_Kenya dataset
prices_weather_production_df = prices_weather_production_df.loc[:, ~prices_weather_production_df.columns.str.contains('^Unnamed')]
prices_weather_production_df = prices_weather_production_df.drop(0)
prices_weather_production_df = prices_weather_production_df.dropna()

# Display the cleaned data
print(prices_weather_production_df.head())


       Date     Month   Regions   Commodity   Unit   Priceflag   Pricetype  \
0     #date    #month  #regions  #commodity  #unit  #priceflag  #pricetype   
1  2006       January     Coast       Maize     KG      actual      Retail   
2  2006      February     Coast       Maize     KG      actual      Retail   
3  2006         March     Coast       Maize     KG      actual      Retail   
4  2006         April     Coast       Maize     KG      actual      Retail   

    Currency   Price   Usdprice  ... Unnamed: 19 Unnamed: 20 Unnamed: 21  \
0  #currency  #price  #usdprice  ...         NaN         NaN         NaN   
1        KES   16.13     0.2235  ...         NaN         NaN         NaN   
2        KES   15.93     0.2208  ...         NaN         NaN         NaN   
3        KES   16.03     0.2221  ...         NaN         NaN         NaN   
4        KES   16.63     0.2305  ...         NaN         NaN         NaN   

   Unnamed: 22  Unnamed: 23  Unnamed: 24  Unnamed: 25  Unnamed: 26  \
0   

In [2]:
# Merge the datasets
merged_df = pd.merge(prices_weather_production_df, counties_regions_df, left_on='Regions', right_on='Region', how='left')
merged_df = merged_df.drop(columns=['Region'])

# Display the merged data
print(merged_df.head())

       Date    Month Regions Commodity Unit Priceflag Pricetype Currency  \
0  2006      January   Coast     Maize   KG    actual    Retail      KES   
1  2006      January   Coast     Maize   KG    actual    Retail      KES   
2  2006      January   Coast     Maize   KG    actual    Retail      KES   
3  2006      January   Coast     Maize   KG    actual    Retail      KES   
4  2006      January   Coast     Maize   KG    actual    Retail      KES   

   Price Usdprice Amount Produced Annual Rainfall Annual Temperature  \
0  16.13   0.2235          9741.6              23                 27   
1  16.13   0.2235          9741.6              23                 27   
2  16.13   0.2235          9741.6              23                 27   
3  16.13   0.2235          9741.6              23                 27   
4  16.13   0.2235          9741.6              23                 27   

       County  
0     Mombasa  
1       Kwale  
2      Kilifi  
3  Tana River  
4        Lamu  


In [4]:
from sklearn.preprocessing import StandardScaler

# Convert date-related columns to datetime format
merged_df['Date'] = pd.to_datetime(merged_df['Date'], errors='coerce')
merged_df['Year'] = merged_df['Date'].dt.year
merged_df['Month'] = merged_df['Date'].dt.month

# Replace commas in numeric columns and convert to numeric type
for col in ['Price', 'Usdprice', 'Amount Produced', 'Annual Rainfall', 'Annual Temperature']:
    merged_df[col] = merged_df[col].str.replace(',', '', regex=True).astype(float)

# Normalize or scale numeric features
scaler = StandardScaler()
merged_df[['Price', 'Usdprice', 'Amount Produced', 'Annual Rainfall', 'Annual Temperature']] = scaler.fit_transform(
    merged_df[['Price', 'Usdprice', 'Amount Produced', 'Annual Rainfall', 'Annual Temperature']])

# Display the normalized data
print(merged_df.head())

        Date  Month Regions Commodity Unit Priceflag Pricetype Currency  \
0 2006-01-01      1   Coast     Maize   KG    actual    Retail      KES   
1 2006-01-01      1   Coast     Maize   KG    actual    Retail      KES   
2 2006-01-01      1   Coast     Maize   KG    actual    Retail      KES   
3 2006-01-01      1   Coast     Maize   KG    actual    Retail      KES   
4 2006-01-01      1   Coast     Maize   KG    actual    Retail      KES   

      Price  Usdprice  Amount Produced  Annual Rainfall  Annual Temperature  \
0 -1.092889 -1.035701        -0.836564        -1.579464            0.250846   
1 -1.092889 -1.035701        -0.836564        -1.579464            0.250846   
2 -1.092889 -1.035701        -0.836564        -1.579464            0.250846   
3 -1.092889 -1.035701        -0.836564        -1.579464            0.250846   
4 -1.092889 -1.035701        -0.836564        -1.579464            0.250846   

       County  Year  
0     Mombasa  2006  
1       Kwale  2006  
2      K

In [5]:
from sklearn.model_selection import train_test_split

# Define the feature set and target variable for price prediction
X_price = merged_df[['Year', 'Month', 'Amount Produced', 'Annual Rainfall', 'Annual Temperature']]
y_price = merged_df['Price']

# Define the feature set and target variables for weather prediction
X_weather = merged_df[['Year', 'Month']]
y_rainfall = merged_df['Annual Rainfall']
y_temperature = merged_df['Annual Temperature']

# Split the data into training and testing sets
X_train_price, X_test_price, y_train_price, y_test_price = train_test_split(X_price, y_price, test_size=0.2, random_state=42)
X_train_weather, X_test_weather, y_train_rainfall, y_test_rainfall, y_train_temperature, y_test_temperature = train_test_split(
    X_weather, y_rainfall, y_temperature, test_size=0.2, random_state=42)

# Display the shapes of the training and testing sets
print(X_train_price.shape, X_test_price.shape, y_train_price.shape, y_test_price.shape)
print(X_train_weather.shape, X_test_weather.shape, y_train_rainfall.shape, y_test_rainfall.shape, y_train_temperature.shape, y_test_temperature.shape)


(7891, 5) (1973, 5) (7891,) (1973,)
(7891, 2) (1973, 2) (7891,) (1973,) (7891,) (1973,)


In [6]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

# ANN model for maize price prediction
def build_price_model():
    model = Sequential()
    model.add(Dense(64, input_dim=X_train_price.shape[1], activation='relu'))
    model.add(Dense(32, activation='relu'))
    model.add(Dense(1))
    model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mean_absolute_error'])
    return model

# ANN model for weather prediction
def build_weather_model():
    model = Sequential()
    model.add(Dense(64, input_dim=X_train_weather.shape[1], activation='relu'))
    model.add(Dense(32, activation='relu'))
    model.add(Dense(2))  # Predicting both rainfall and temperature
    model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mean_absolute_error'])
    return model

# Build models
price_model = build_price_model()
weather_model = build_weather_model()

# Display model summaries
price_model.summary()
weather_model.summary()


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [7]:
# Train the price model
price_history = price_model.fit(X_train_price, y_train_price, epochs=50, batch_size=10, validation_split=0.2)

# Train the weather model
weather_history = weather_model.fit(X_train_weather, [y_train_rainfall, y_train_temperature], epochs=50, batch_size=10, validation_split=0.2)


Epoch 1/50
[1m632/632[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - loss: 4480.0273 - mean_absolute_error: 25.2382 - val_loss: 1.1749 - val_mean_absolute_error: 0.9348
Epoch 2/50
[1m632/632[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 1.0922 - mean_absolute_error: 0.8124 - val_loss: 0.8914 - val_mean_absolute_error: 0.7599
Epoch 3/50
[1m632/632[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 1.0445 - mean_absolute_error: 0.7908 - val_loss: 0.9905 - val_mean_absolute_error: 0.7097
Epoch 4/50
[1m632/632[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 1.2783 - mean_absolute_error: 0.8821 - val_loss: 1.4082 - val_mean_absolute_error: 0.8726
Epoch 5/50
[1m632/632[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - loss: 1.3601 - mean_absolute_error: 0.9107 - val_loss: 1.2381 - val_mean_absolute_error: 0.8058
Epoch 6/50
[1m632/632[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m

In [8]:
# Evaluate the price model
price_loss, price_mae = price_model.evaluate(X_test_price, y_test_price)
print(f'Price Model - Loss: {price_loss}, MAE: {price_mae}')

# Evaluate the weather model
weather_loss, weather_mae = weather_model.evaluate(X_test_weather, [y_test_rainfall, y_test_temperature])
print(f'Weather Model - Loss: {weather_loss}, MAE: {weather_mae}')


[1m62/62[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 4.7761 - mean_absolute_error: 2.0364
Price Model - Loss: 4.785111904144287, MAE: 2.0357186794281006
[1m62/62[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 1.1183 - mean_absolute_error: 0.8558
Weather Model - Loss: 1.0800492763519287, MAE: 0.8358787298202515
