In [274]:
# TensorFlow and tf.keras
import tensorflow as tf

# Helper libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.preprocessing import LabelEncoder



# PreProcessing Data

In [275]:

file_path = 'finalMergedData.csv'
df = pd.read_csv(file_path)

Delete the features that we are able to acess before the play starts, including passResult, passLength, penaltyYards, playNullifiedByPenalty, passPossibility

In [276]:
# Add features to delete
features_to_delete = ['passResult', 'passProbability','prePenaltyPlayResult', 'penaltyYards', 'playNullifiedByPenalty', 'foulName1', 'foulName2', 'foulNFLId1', 'foulNFLId2', 'homeTeamWinProbabilityAdded', 'visitorTeamWinProbilityAdded','expectedPointsAdded', 'expectedPoints' ]
# Drop the unwanted features
df = df.drop(features_to_delete, axis=1)

In [277]:
# delete a few descriptive features:
Features_to_delete = ['playDescription', 'gameId', 'ballCarrierId', 'ballCarrierDisplayName', 'collegeName', 'Full Name', 'Team']
df = df.drop(Features_to_delete, axis=1)

In [278]:
column_data_types = df.dtypes
print(column_data_types)
df

playId                                int64
quarter                               int64
down                                  int64
yardsToGo                             int64
possessionTeam                       object
defensiveTeam                        object
yardlineSide                         object
yardlineNumber                        int64
gameClock                            object
preSnapHomeScore                      int64
preSnapVisitorScore                   int64
passLength                          float64
playResult                            int64
absoluteYardlineNumber                int64
offenseFormation                     object
defendersInTheBox                   float64
preSnapHomeTeamWinProbability       float64
preSnapVisitorTeamWinProbability    float64
frameId                               int64
playDirection                        object
x                                   float64
y                                   float64
distanceToEndZone               

Unnamed: 0,playId,quarter,down,yardsToGo,possessionTeam,defensiveTeam,yardlineSide,yardlineNumber,gameClock,preSnapHomeScore,...,x,y,distanceToEndZone,height_x,weight_x,Position,yearsPro,overall_rating,teamId,age
0,3537,4,1,10,ATL,TB,ATL,41,7:52,21,...,63.87,29.23,63.87,6-3,261,TE,2.0,52.0,14.0,27.0
1,121,1,3,4,ATL,NO,ATL,42,13:41,0,...,52.77,26.42,67.23,6-3,261,TE,2.0,52.0,14.0,27.0
2,1217,2,4,1,ATL,LA,LA,13,5:30,14,...,101.06,8.47,18.94,6-3,261,TE,2.0,52.0,14.0,27.0
3,749,1,2,14,ATL,CLE,CLE,35,2:11,3,...,91.15,51.02,28.85,6-3,261,TE,2.0,52.0,14.0,27.0
4,959,2,2,7,ATL,CAR,ATL,49,13:40,0,...,66.42,39.05,53.58,6-3,261,TE,2.0,52.0,14.0,27.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11343,3088,3,2,6,BUF,KC,BUF,29,2:05,17,...,38.12,19.84,81.88,6-5,251,TE,3.0,64.0,3.0,27.0
11344,2612,3,1,10,DEN,IND,DEN,25,8:51,6,...,93.12,23.99,93.12,6-0,225,HB,3.0,63.0,27.0,25.0
11345,1204,2,2,7,DAL,NYG,NYG,14,9:58,3,...,99.29,14.20,20.71,6-3,195,WR,0.0,71.0,11.0,23.0
11346,2064,3,2,10,IND,JAX,IND,34,8:56,17,...,47.00,15.72,73.00,6-4,220,WR,2.0,64.0,10.0,24.0


In [279]:
# Combine all unique values from the relevant columns into one list
unique_teams = pd.unique(df[['possessionTeam', 'defensiveTeam', 'yardlineSide']].values.ravel('K'))

# Create a label encoder object
label_encoder = LabelEncoder()

# Fit the label encoder on the unique values
label_encoder.fit(unique_teams)

# Transform each column with the fitted label encoder
df['possessionTeam'] = label_encoder.transform(df['possessionTeam'])
df['defensiveTeam'] = label_encoder.transform(df['defensiveTeam'])
df['yardlineSide'] = label_encoder.transform(df['yardlineSide'])
df['offenseFormation'] = label_encoder.fit_transform(df['offenseFormation'])
df['Position'] = label_encoder.fit_transform(df['Position'])
df['playDirection'] = label_encoder.fit_transform(df['playDirection'])

In [280]:
df

Unnamed: 0,playId,quarter,down,yardsToGo,possessionTeam,defensiveTeam,yardlineSide,yardlineNumber,gameClock,preSnapHomeScore,...,x,y,distanceToEndZone,height_x,weight_x,Position,yearsPro,overall_rating,teamId,age
0,3537,4,1,10,1,29,1,41,7:52,21,...,63.87,29.23,63.87,6-3,261,7,2.0,52.0,14.0,27.0
1,121,1,3,4,1,22,1,42,13:41,0,...,52.77,26.42,67.23,6-3,261,7,2.0,52.0,14.0,27.0
2,1217,2,4,1,1,16,16,13,5:30,14,...,101.06,8.47,18.94,6-3,261,7,2.0,52.0,14.0,27.0
3,749,1,2,14,1,7,7,35,2:11,3,...,91.15,51.02,28.85,6-3,261,7,2.0,52.0,14.0,27.0
4,959,2,2,7,1,4,1,49,13:40,0,...,66.42,39.05,53.58,6-3,261,7,2.0,52.0,14.0,27.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11343,3088,3,2,6,3,15,3,29,2:05,17,...,38.12,19.84,81.88,6-5,251,7,3.0,64.0,3.0,27.0
11344,2612,3,1,10,9,13,9,25,8:51,6,...,93.12,23.99,93.12,6-0,225,2,3.0,63.0,27.0,25.0
11345,1204,2,2,7,8,23,23,14,9:58,3,...,99.29,14.20,20.71,6-3,195,8,0.0,71.0,11.0,23.0
11346,2064,3,2,10,13,14,13,34,8:56,17,...,47.00,15.72,73.00,6-4,220,8,2.0,64.0,10.0,24.0


In [281]:


# df = pd.get_dummies(df, columns=['possessionTeam', 'defensiveTeam', 'yardlineSide', 'offenseFormation', 'Position', 'playDirection'])
def clock_to_minutes(time_str):
    hours, minutes = map(int, time_str.split(':'))
    return hours * 60 + minutes
def height_to_cm(height):
    feet, inches = height.split('-')
    return int(feet) * 30.48 + int(inches) * 2.54
# Apply this function to each entry in the 'gameClock' column
df['gameClock'] = df['gameClock'].apply(clock_to_minutes)
df['height_x'] = df['height_x'].apply(height_to_cm)

df

Unnamed: 0,playId,quarter,down,yardsToGo,possessionTeam,defensiveTeam,yardlineSide,yardlineNumber,gameClock,preSnapHomeScore,...,x,y,distanceToEndZone,height_x,weight_x,Position,yearsPro,overall_rating,teamId,age
0,3537,4,1,10,1,29,1,41,472,21,...,63.87,29.23,63.87,190.50,261,7,2.0,52.0,14.0,27.0
1,121,1,3,4,1,22,1,42,821,0,...,52.77,26.42,67.23,190.50,261,7,2.0,52.0,14.0,27.0
2,1217,2,4,1,1,16,16,13,330,14,...,101.06,8.47,18.94,190.50,261,7,2.0,52.0,14.0,27.0
3,749,1,2,14,1,7,7,35,131,3,...,91.15,51.02,28.85,190.50,261,7,2.0,52.0,14.0,27.0
4,959,2,2,7,1,4,1,49,820,0,...,66.42,39.05,53.58,190.50,261,7,2.0,52.0,14.0,27.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11343,3088,3,2,6,3,15,3,29,125,17,...,38.12,19.84,81.88,195.58,251,7,3.0,64.0,3.0,27.0
11344,2612,3,1,10,9,13,9,25,531,6,...,93.12,23.99,93.12,182.88,225,2,3.0,63.0,27.0,25.0
11345,1204,2,2,7,8,23,23,14,598,3,...,99.29,14.20,20.71,190.50,195,8,0.0,71.0,11.0,23.0
11346,2064,3,2,10,13,14,13,34,536,17,...,47.00,15.72,73.00,193.04,220,8,2.0,64.0,10.0,24.0


In [282]:
df['defendersInTheBox'] = df['defendersInTheBox'].fillna(df['defendersInTheBox'].mean())
df['passLength'] = df['passLength'].fillna(df['defendersInTheBox'].mean())
# Check if any row has at least one missing value
# Check if any row has at least one missing value
rows_with_missing_values = df.isnull().any(axis=1)
sum(rows_with_missing_values)
scaler = StandardScaler()

# Fit the scaler to the data and transform the data
categorical_features = ['possessionTeam', 'defensiveTeam', 'yardlineSide', 'offenseFormation', 'Position', 'playDirection']

# List of columns to exclude from standardization (including target and any non-features)
excluded_columns = ['playResult', 'passLength'] 

# Creating the list of columns to standardize by excluding the specified columns
columns_to_standardize = [col for col in df.columns if col not in excluded_columns]

# Subset the DataFrame to include only the columns to standardize
df_subset = df[columns_to_standardize]
df_standardized = scaler.fit_transform(df_subset)
df_subset = pd.DataFrame(df_standardized, columns=df_subset.columns)
df[columns_to_standardize] = df_subset
df

Unnamed: 0,playId,quarter,down,yardsToGo,possessionTeam,defensiveTeam,yardlineSide,yardlineNumber,gameClock,preSnapHomeScore,...,x,y,distanceToEndZone,height_x,weight_x,Position,yearsPro,overall_rating,teamId,age
0,1.323072,1.274946,-0.918198,0.397173,-1.483311,1.471248,-1.512065,0.915164,0.113727,1.025407,...,0.143207,0.247832,0.027177,0.977365,2.277991,1.030017,-0.669202,-3.014546,-0.201423,0.548816
1,-1.588305,-1.361688,1.588349,-1.126446,-1.483311,0.718273,-1.512065,0.995623,1.416317,-1.193672,...,-0.310287,-0.046835,0.165632,0.977365,2.277991,1.030017,-0.669202,-3.014546,-0.201423,0.548816
2,-0.654210,-0.482810,2.841622,-1.888255,-1.483311,0.072865,0.065051,-1.337696,-0.416266,0.285714,...,1.662616,-1.929143,-1.824240,0.977365,2.277991,1.030017,-0.669202,-3.014546,-0.201423,0.548816
3,-1.053075,-1.361688,0.335075,1.412919,-1.483311,-0.895246,-0.881219,0.432408,-1.159003,-0.876660,...,1.257740,2.532818,-1.415882,0.977365,2.277991,1.030017,-0.669202,-3.014546,-0.201423,0.548816
4,-0.874097,-0.482810,0.335075,-0.364636,-1.483311,-1.217950,-1.512065,1.558838,1.412585,-1.193672,...,0.247388,1.277596,-0.396840,0.977365,2.277991,1.030017,-0.669202,-3.014546,-0.201423,0.548816
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11343,0.940400,0.396068,0.335075,-0.618573,-1.270331,-0.034703,-1.301783,-0.050347,-1.181397,0.602726,...,-0.908818,-0.736840,0.769310,1.743698,1.760168,1.030017,-0.257731,-1.728845,-1.397013,0.548816
11344,0.534716,0.396068,-0.918198,0.397173,-0.631393,-0.249839,-0.670937,-0.372184,0.333936,-0.559649,...,1.338225,-0.301655,1.232473,-0.172135,0.413829,-0.793824,-0.257731,-1.835987,1.211546,-0.221733
11345,-0.665290,-0.482810,0.335075,-0.364636,-0.737883,0.825841,0.801038,-1.257237,0.584003,-0.876660,...,1.590302,-1.328273,-1.751304,0.977365,-1.139639,1.394785,-1.492143,-0.978853,-0.527493,-0.992282
11346,0.067668,0.396068,0.335075,0.397173,-0.205435,-0.142271,-0.250372,0.351949,0.352598,0.602726,...,-0.546022,-1.168880,0.403395,1.360531,0.154918,1.394785,-0.669202,-1.728845,-0.636183,-0.607007


In [283]:
df.columns

Index(['playId', 'quarter', 'down', 'yardsToGo', 'possessionTeam',
       'defensiveTeam', 'yardlineSide', 'yardlineNumber', 'gameClock',
       'preSnapHomeScore', 'preSnapVisitorScore', 'passLength', 'playResult',
       'absoluteYardlineNumber', 'offenseFormation', 'defendersInTheBox',
       'preSnapHomeTeamWinProbability', 'preSnapVisitorTeamWinProbability',
       'frameId', 'playDirection', 'x', 'y', 'distanceToEndZone', 'height_x',
       'weight_x', 'Position', 'yearsPro', 'overall_rating', 'teamId', 'age'],
      dtype='object')

In [284]:
import pandas as pd
import seaborn as sns


# Assuming df is your DataFrame and 'playResult' is your target variable
X = df.drop(['playResult','passLength'], axis=1) # Features
y_final = df['playResult'] # Target variable
y_aux = df['passLength']

# Correlation analysis
# correlation_matrix = df.corr()
# print(correlation_matrix['playResult'].sort_values(ascending=False))
# plt.figure(figsize=(30,20))
# sns.heatmap(correlation_matrix, annot=True, fmt=".2f")
# plt.show()


In [285]:
# frameId is unrealeted, decide to delete it
df = df.drop('frameId', axis=1)

In [286]:
X.dtypes

playId                              float64
quarter                             float64
down                                float64
yardsToGo                           float64
possessionTeam                      float64
defensiveTeam                       float64
yardlineSide                        float64
yardlineNumber                      float64
gameClock                           float64
preSnapHomeScore                    float64
preSnapVisitorScore                 float64
absoluteYardlineNumber              float64
offenseFormation                    float64
defendersInTheBox                   float64
preSnapHomeTeamWinProbability       float64
preSnapVisitorTeamWinProbability    float64
frameId                             float64
playDirection                       float64
x                                   float64
y                                   float64
distanceToEndZone                   float64
height_x                            float64
weight_x                        

# Split data into trainig, validation and test sets

In [287]:

# Let's say you have y_aux as your auxiliary target (intermediary play result)
# and y_final as your final target to predict

# First, you split your data into training and a temporary set (combining validation and test)
X_train, X_temp, y_aux_train, y_aux_temp, y_final_train, y_final_temp = train_test_split(
    X, y_aux, y_final, test_size=0.4, random_state=42
)

# Now split the temporary set further into validation and test sets for both auxiliary and final targets
X_val, X_test, y_aux_val, y_aux_test, y_final_val, y_final_test = train_test_split(
    X_temp, y_aux_temp, y_final_temp, test_size=0.5, random_state=42
)


In [288]:
X_train.dtypes

playId                              float64
quarter                             float64
down                                float64
yardsToGo                           float64
possessionTeam                      float64
defensiveTeam                       float64
yardlineSide                        float64
yardlineNumber                      float64
gameClock                           float64
preSnapHomeScore                    float64
preSnapVisitorScore                 float64
absoluteYardlineNumber              float64
offenseFormation                    float64
defendersInTheBox                   float64
preSnapHomeTeamWinProbability       float64
preSnapVisitorTeamWinProbability    float64
frameId                             float64
playDirection                       float64
x                                   float64
y                                   float64
distanceToEndZone                   float64
height_x                            float64
weight_x                        

# Set up model

In [289]:
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)
warnings.filterwarnings("ignore", category=UserWarning)


In [290]:
# categorical_features = ['possessionTeam', 'defensiveTeam', 'yardlineSide', 'offenseFormation', 'Position', 'playDirection']

# # Calculate the number of unique categories for each feature
# categorical_feature_sizes = [X_train[feature].nunique() + 1 for feature in categorical_features]  # +1 for zero-indexing
# categorical_indices = [X_train.columns.get_loc(c) for c in categorical_features if c in X_train.columns]
# X_train = df.to_numpy()
# print(categorical_feature_sizes)
# print(categorical_indices)
# print(X_train)

In [292]:
# with tuning
import numpy as np
from keras.models import Model
from keras.layers import Dense, Dropout, Input,BatchNormalization,Embedding,Flatten
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping
from keras.regularizers import l2
from sklearn.model_selection import KFold
import itertools
import matplotlib.pyplot as plt
# Define the model creation function
def create_model(number_of_features, learning_rate, n_layers, n_units, dropout_rate, activation, dropoutYes, reg_lambda=0.0001):
    inputs = Input(shape=(number_of_features,))
    x = inputs
    x = BatchNormalization()(x)
    for i in range(n_layers):
        x = Dense(n_units, activation=activation)(x)
    if dropoutYes > 0.5:
        x = Dropout(dropout_rate)(x)
    x = BatchNormalization()(x)
    for i in range(n_layers):
        x = Dense(n_units, activation=activation, kernel_regularizer=l2(reg_lambda))(x)
    main_output = Dense(1, name='main_output')(x)
    auxiliary_output = Dense(1, name='aux_output')(x)
    model = Model(inputs=inputs, outputs=[main_output, auxiliary_output])
    model.compile(optimizer=Adam(learning_rate=learning_rate),
                  loss={'main_output': 'mean_squared_error', 'aux_output': 'mean_squared_error'},
                  loss_weights={'main_output': 1.0, 'aux_output': 0.4},
                  metrics={'main_output': 'mean_squared_error', 'aux_output': 'mean_squared_error'})
    return model

# Parameters grid
param_grid = {
    'learning_rate': [0.001, 0.0001],
    'n_layers': [1, 2, 3],
    'n_units': [32, 64, 128],
    'dropout_rate': [0.0, 0.2],
    'activation': ['relu', 'tanh', 'sigmoid'],
    'dropout_yes':[1]
}

# Generate all combinations of parameters
param_combinations = list(itertools.product(*param_grid.values()))

# Define K-Fold cross-validation
n_splits = 5
kf = KFold(n_splits=n_splits, shuffle=True, random_state=42)

# Define the EarlyStopping callback
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
parameter_labels = []  # To store string representations of parameter sets
avg_validation_losses = [] 

# Perform K-Fold cross-validation
for params in param_combinations:
    print(params)
    lr, n_layers, n_units, dropout_rate, activation,dropoutYes = params
    fold_val_losses = []
    for train_index, val_index in kf.split(X_train):
        # Create and compile a new model with current parameters
        model = create_model(
            number_of_features=X_train.shape[1],
            learning_rate=lr,
            n_layers=n_layers,
            n_units=n_units,
            dropout_rate=dropout_rate,
            dropoutYes=dropoutYes,  
            activation=activation
        )
        # Train and evaluate the model on the current fold
        history = model.fit(
            X_train.iloc[train_index], [y_final_train.iloc[train_index], y_aux_train.iloc[train_index]],
            validation_data=(X_train.iloc[val_index], [y_final_train.iloc[val_index], y_aux_train.iloc[val_index]]),
            epochs=100,
            batch_size=32,
            verbose=0,
            callbacks=[early_stopping]
        )
        # Calculate and store the validation loss for this fold
        
        fold_val_losses.append(np.min(history.history['val_loss']))
    
    # Average validation loss across all folds for the current parameter set

    avg_val_loss = np.mean(fold_val_losses)
    avg_validation_losses.append(avg_val_loss)
    parameter_labels.append(f"LR: {lr}, Layers: {n_layers}, Units: {n_units}, Dropout: {dropout_rate}, Act: {activation}, DropoutYes: {dropoutYes}")
    print(f"Avg. Validation Loss for params {params}: {avg_val_loss}")



(0.001, 1, 32, 0.0, 'relu', 1)
Avg. Validation Loss for params (0.001, 1, 32, 0.0, 'relu', 1): 80.7089828491211
(0.001, 1, 32, 0.0, 'tanh', 1)
Avg. Validation Loss for params (0.001, 1, 32, 0.0, 'tanh', 1): 87.76981811523437
(0.001, 1, 32, 0.0, 'sigmoid', 1)
Avg. Validation Loss for params (0.001, 1, 32, 0.0, 'sigmoid', 1): 88.5188201904297
(0.001, 1, 32, 0.2, 'relu', 1)
Avg. Validation Loss for params (0.001, 1, 32, 0.2, 'relu', 1): 87.6196060180664
(0.001, 1, 32, 0.2, 'tanh', 1)
Avg. Validation Loss for params (0.001, 1, 32, 0.2, 'tanh', 1): 88.26673889160156
(0.001, 1, 32, 0.2, 'sigmoid', 1)
Avg. Validation Loss for params (0.001, 1, 32, 0.2, 'sigmoid', 1): 88.86757354736328
(0.001, 1, 64, 0.0, 'relu', 1)
Avg. Validation Loss for params (0.001, 1, 64, 0.0, 'relu', 1): 83.21631622314453
(0.001, 1, 64, 0.0, 'tanh', 1)
Avg. Validation Loss for params (0.001, 1, 64, 0.0, 'tanh', 1): 84.50328521728515
(0.001, 1, 64, 0.0, 'sigmoid', 1)
Avg. Validation Loss for params (0.001, 1, 64, 0.0, '

In [None]:
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Dropout,BatchNormalization
from keras.optimizers import Adam
from keras import regularizers
from scikeras.wrappers import KerasRegressor
from sklearn.model_selection import RandomizedSearchCV, KFold
from keras.callbacks import EarlyStopping

# Define the early stopping callback
early_stopping = EarlyStopping(monitor='mean_squared_error', patience=5, restore_best_weights=True)

def create_model(input_shape=(X_train.shape[1],), learning_rate = 0.01, nlayers = 3, nunits = 32, dropout_rate = 0.2, activation = 'relu',dropoutYes = 0,reg_lambda=0.0001):
    
    model = Sequential()
    model.add(Dense(nunits,activation = activation,input_shape = input_shape))
    model.add(BatchNormalization())
    for i in range(nlayers):
        model.add(Dense(nunits,activation = activation))
        

    if dropoutYes > 0.5:
        model.add(Dropout(dropout_rate))
    model.add(BatchNormalization())
    
    for i in range(nlayers):
        model.add(Dense(nunits,activation = activation),kernel_regularizer=l2(reg_lambda))
    
    model.add(Dense(1,activation='linear'))

    model.compile(optimizer=Adam(learning_rate=learning_rate),
                  loss='mean_squared_error',
                  metrics=['mean_squared_error'])
    
    return model

model = KerasRegressor(model=create_model, verbose=0,callbacks=early_stopping)
param_grid = {
    'model__activation': ['relu', 'tanh', 'sigmoid'],
    'model__learning_rate': [0.01, 0.001, 0.0001],
    'model__nlayers':[2,3,4],
    'model__nunits':[32,64,128],
    'model__dropoutYes':[0,1],
    'epochs': [50, 100, 200],
    'batch_size': [16, 32]
}
random_search = RandomizedSearchCV(model, param_distributions=param_grid, cv=KFold(5), random_state=42,n_jobs=-1)

random_search_results = random_search.fit(X_train, y_final_train)
print("Best Score:", random_search_results.best_score_)
print("Best Params:", random_search_results.best_params_)

ValueError: Found input variables with inconsistent numbers of samples: [6808, 2]

In [None]:
best_model = random_search_results.best_estimator_
predictions = best_model.predict(X_val)
from sklearn.metrics import mean_absolute_error
mae = mean_absolute_error(y_final_val, predictions)
print("Mean absolute error on validation Data:", mae)

Mean absolute error on validation Data: 4.779473361359819


In [None]:
print("X_train shape:", X_train.shape)
print("y_final_train shape:", y_final_train.shape)
print("y_aux_train shape:", y_aux_train.shape)


X_train shape: (6808, 29)
y_final_train shape: (6808,)
y_aux_train shape: (6808,)


In [None]:
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Dropout,BatchNormalization
from keras.optimizers import Adam
from keras import regularizers
from scikeras.wrappers import KerasRegressor
from sklearn.model_selection import RandomizedSearchCV, KFold

# Model building function adapted for regression with specified input shape
def create_model(learning_rate=0.01, activation='relu', input_shape=(10,)):  # Assuming input_shape is 10 for this example
    model = Sequential([
        Dense(128, activation=activation, input_shape=input_shape, kernel_regularizer=regularizers.l2(1e-5)),
        Dropout(0.5),
        BatchNormalization(),
        Dense(128, activation=activation, kernel_regularizer=regularizers.l2(1e-5)),
        Dropout(0.5),
        Dense(1, activation='linear')
    ])
    model.compile(optimizer=Adam(learning_rate=learning_rate),
                  loss='mean_squared_error',
                  metrics=['mean_squared_error'])
    return model

# Wrap the model into Scikeras KerasRegressor
model = KerasRegressor(model=create_model, verbose=0)

# Setup parameter grid
param_grid = {
    'model__activation': ['relu', 'tanh', 'sigmoid', 'linear'],
    'model__learning_rate': [0.01, 0.001, 0.0001]
}

# Setup randomized search
random_search = RandomizedSearchCV(model, param_distributions=param_grid, cv=KFold(10), random_state=42)

# Example data
X_train = np.random.rand(100, 10)
y_train = np.random.rand(100)  # Continuous targets for regression

# Perform random search
random_search_results = random_search.fit(X_train, y_train)

# Access the best model, parameters, and score
print("Best Score:", random_search_results.best_score_)
print("Best Params:", random_search_results.best_params_)




KeyboardInterrupt: 