# Getting Started: Market Research
This Jupyter notebook is a quick demonstration on how to get started on the market research section.

## 1) Download Data
Please download the train and test data and place it within the ./research/data path. If you've placed it in the correct place, you should see the following cell work:

In [None]:
%pip install tensorflow

In [23]:
import pandas as pd
import numpy as np
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Ridge, LinearRegression
from sklearn.metrics import r2_score
import lightgbm as lgb


train = pd.read_csv('./data/train.csv')
test = pd.read_csv('./data/test.csv')
train_new = pd.read_csv('./data/train_new.csv')
test_new = pd.read_csv('./data/test_new.csv')

train = pd.concat([train, train_new], axis=1)
test = pd.concat([test, test_new], axis=1)
train = train.fillna(train.mean())
test = test.fillna(test.mean())

features = [c for c in train.columns if c not in ["Y1", "Y2"]]

# X = train[features].values
# X_test = test[features].values
y2 = train["Y2"].values

In [25]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Input

from sklearn.model_selection import train_test_split

scaler = StandardScaler()
X = scaler.fit_transform(train[features].values)
X_test = scaler.transform(test[features].values)

n_features = X.shape[1]

mlp_model = Sequential([
    Input(shape=(n_features,)),
    Dense(128, activation='relu'),
    Dropout(0.3),
    Dense(64, activation='relu'),
    Dropout(0.3),
    Dense(1)
])


mlp_model.compile(optimizer='adam', loss='mse')

X_train, X_val, y2_train, y2_val = train_test_split(X, y2, test_size=0.2, random_state=42, shuffle = True)
# split = int(0.8 * len(X))
# X_train, X_val = X[:split], X[split:]
# y2_train, y2_val = y2[:split], y2[split:]
#50 and 64
hist = mlp_model.fit(X_train, y2_train, 
                     epochs=200, 
                     batch_size=128, 
                     validation_data=(X_val, y2_val), 
                     verbose=1)

y2_pred = mlp_model.predict(X_val)
r2 = r2_score(y2_val, y2_pred)
print(f"Validation R² Score: {r2}")

Epoch 1/200
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - loss: 0.3964 - val_loss: 0.3005
Epoch 2/200
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 0.3068 - val_loss: 0.2484
Epoch 3/200
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.2856 - val_loss: 0.2523
Epoch 4/200
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.2778 - val_loss: 0.2370
Epoch 5/200
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.2677 - val_loss: 0.2283
Epoch 6/200
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.2631 - val_loss: 0.2156
Epoch 7/200
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 0.2492 - val_loss: 0.2315
Epoch 8/200
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 0.2589 - val_loss: 0.2377
Epoch 9/200
[1m500/500[0m [32