In [1]:
# import your libraries here
import pandas as pd
import keras
import numpy as np
import multiprocessing

# Importing utility functions from Keras
from keras.preprocessing.text import Tokenizer
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Dense, Input, InputLayer, Normalization
from keras.optimizers import Adam
from sklearn.metrics import mean_squared_error, mean_absolute_error


2023-04-07 16:12:48.341137: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
data = pd.read_csv("processed_data.csv")
# print(data["Year"].value_counts())

In [3]:
from time import time_ns

def get_X_and_y(df : pd.DataFrame, y_col_name:str) -> tuple[np.ndarray,np.ndarray]:
    X = df.copy()
    X.pop("Country")
    y = X.pop(y_col_name) 
    return X.__array__().astype("float32"), y.__array__().astype("float32")

def data_generator(X, y, batch_size: int) -> tuple[list,list]:
    '''
    Returns data generator to be used by feed_forward
    https://wiki.python.org/moin/Generators
    https://realpython.com/introduction-to-python-generators/
    
    Yields batches of embeddings and labels to go with them.
    Use one hot vectors to encode the labels 
    (see the to_categorical function)
    
    '''
    i = 0
    for i in range(0,len(y),batch_size):
        next_i = min(len(y), i+batch_size)
        features = X[i:next_i]
        y_true = y[i:next_i]
        yield features, y_true




TypeError: 'type' object is not subscriptable

In [None]:
# given lines of text (arraylike) and embeddings (model.wv)
def train_model(X,y, learning_rate = 0.01, epochs=100):
    # print(X)
    num_sequences_per_batch = 16 # this is the batch size
    steps_per_epoch = len(X)//num_sequences_per_batch  # Number of batches per epoch
    # train_generator = data_generator(X, y, num_sequences_per_batch)
    # sample=next(train_generator) # this is how you get data out of generators
    # print(sample)

    # Define the model architecture using Keras Sequential API
    model = Sequential()
    # setup normalization layer as input layer - DON'T TOUCH THIS LAYER
    normalizer = Normalization(input_shape=(X.shape[1],),axis=None)
    # adapt for our training input data
    normalizer.adapt(X)
    # add normalization layer to 
    model.add(normalizer)
    # configure and add hidden layers - TWEAK THESE LAYERS (modify, add, or remove)
    model.add(layer_h1 := Dense(2000, activation='relu'))
    model.add(layer_h2 := Dense(1500, activation='relu'))
    model.add(layer_h3 := Dense(1000, activation='relu'))
    model.add(layer_h4 := Dense(500, activation='relu'))
    # add output layer
    model.add(layer_o := Dense(1)) 
    model.summary()

    # setup model for learning
    model.compile(optimizer=Adam(learning_rate=learning_rate),
                loss='mean_absolute_error')
    
    # train the model
    model.fit(x=X, y=y,epochs=epochs)

    # return the model, tokenizer, and embedding dict for sentence generation use later
    return model

In [None]:
# split data into training and test
train_data = data.loc[data["Year"]!=2018,:]
test_data = data.loc[data["Year"]==2018,:]

# then get X and y for both training and test data (numpy arrays)
train_X,train_y = get_X_and_y(train_data,"total_yield")
test_X,test_y = get_X_and_y(test_data,"total_yield")
train_y

In [None]:
m = train_model(train_X, train_y, learning_rate = 0.01, epochs=100)


In [None]:
preds = [arr[0] for arr in m.predict(test_X)]
# for yhat,y in zip(preds,test_y):
    # print(f"predicted: {yhat} , y_true: {y}")
print(mean_absolute_error(y_pred=preds,y_true=test_y))

## HYPER PARAMETER TUNING

In [None]:
#Test out different learning rates
learning_rates = np.arange(0, 1, 0.001)
learning_rates_models = []

for i,rate in enumerate(learning_rates):
    m = train_model(train_X, train_y, learning_rate = rate, epochs=100)
    preds = [arr[0] for arr in m.predict(test_X)]
    learning_rates_models.append(mean_absolute_error(y_pred=preds,y_true=test_y))
    
#Test out different # of epochs
epoch_values_models = []

for i in range(10000):
    m = train_model(train_X, train_y, learning_rate = 0.01, epochs=i)
    preds = [arr[0] for arr in m.predict(test_X)]
    epoch_values_models.append(mean_absolute_error(y_pred=preds,y_true=test_y))
    