In [None]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from keras import Sequential, layers, optimizers
from keras import regularizers
from keras_tuner import Hyperband

# columns_to_load = [
#     "Sector_EI",
#     "Compnay_EI",  # Will rename to "Company_EI" after loading
#     "Sector_EM",
#     "Company_EM",
#     "Sector_IM",
#     "Company_IM",
#     "sector_exposure_id",
#     "Normalized_Stock_Price_Change"
# ]

columns_to_load = [
    "Sector_EI",
    "Compnay_EI",
    "sector_exposure_id",
    "Normalized_Stock_Price_Change"
]


# Load the data
file_path = '/Users/mohanganadal/Data Company/Text Processing/Programs/DocumentProcessor/Source Code/Data-Company/AI/NN_Data_Input/X_Train_Stck_Price_Chg_Pct_Pos.csv'  # Replace with your file path

# file_path = '/Users/mohanganadal/Data Company/Text Processing/Programs/DocumentProcessor/Source Code/Data-Company/AI/NN_Data_Input/X_Train_Stck_Price_Chg_Pct_Full.csv'  # Replace with your file path



risk_dataset = pd.read_csv(file_path, usecols=columns_to_load)

# Rename the column "Compnay_EI" to "Company_EI"
risk_dataset.rename(columns={"Compnay_EI": "Company_EI"}, inplace=True)

X = risk_dataset.drop('Normalized_Stock_Price_Change', axis=1)
y = risk_dataset['Normalized_Stock_Price_Change']

# Standardize the features
standard_scalar = StandardScaler()
X_scaled = standard_scalar.fit_transform(X)
# X_scaled = X
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.33, random_state=33)

# Define the model-building function for hyperparameter tuning


def build_model(hp):
    model = Sequential()
    model.add(layers.Input(shape=(3,)))  # Adjust input shape based on dataset

    # Tune the number of layers and units per layer
    for i in range(hp.Int('num_layers', 2, 4)):  # 2 to 4 hidden layers
        model.add(layers.Dense(
            units=hp.Int(f'units_{i}', min_value=64,
                         max_value=256, step=64),  # Units per layer
            # Activation function
            activation=hp.Choice('activation', ['relu', 'tanh']),
            # L2 Regularization to avoid overfitting
            kernel_regularizer=regularizers.l2(0.01)
        ))

        # Optional: Add Dropout for regularization
        # Dropout with 20% probability to avoid overfitting
        model.add(layers.Dropout(0.2))

    # Output layer with 1 neuron for regression
    # Single output for regression
    model.add(layers.Dense(1, activation='linear'))

    # Compile the model with tunable learning rate
    model.compile(
        optimizer=optimizers.Adam(learning_rate=hp.Choice(
            'learning_rate', [1e-2, 1e-3, 1e-4])),
        loss='mean_squared_error',
        metrics=['mae']
    )

    return model


# Initialize the tuner (Hyperband)
tuner = Hyperband(
    build_model,
    objective='val_mae',  # Minimize validation MAE
    max_epochs=50,        # Maximum number of epochs for tuning
    factor=3,             # Factor for reducing the range of hyperparameters
    directory='hyperparam_tuning',  # Directory to store the results
    project_name='risk_prediction_optimized'
)

# Start the search for the best hyperparameters
tuner.search(X_train, y_train, validation_split=0.2, epochs=25, batch_size=32)

# Get the best hyperparameters
best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]

print(f"""
The optimal number of layers is {best_hps.get('num_layers')}.
The optimal units in each layer are {[best_hps.get(f'units_{i}') for i in range(best_hps.get('num_layers'))]}.
The optimal activation function is {best_hps.get('activation')}.
The optimal learning rate is {best_hps.get('learning_rate')}.
""")

# Build the best model with the selected hyperparameters
best_model = tuner.hypermodel.build(best_hps)

# Train the best model on the entire training dataset
history = best_model.fit(
    X_train, y_train, validation_split=0.2, epochs=100, batch_size=32)

# Evaluate the best model on the test set
test_loss, test_mae = best_model.evaluate(X_test, y_test, verbose=0)
print(f"Test Loss: {test_loss}")
print(f"Test MAE: {test_mae}")

# Plot the training history (optional)
plt.plot(history.history['mae'], label='Training MAE')
plt.plot(history.history['val_mae'], label='Validation MAE')
plt.xlabel('Epochs')
plt.ylabel('Mean Absolute Error (MAE)')
plt.legend()
plt.show()

In [31]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt


# columns_to_load = [
#     "company_name",
#     "year",
#     "Sector_EI",
#     "Compnay_EI",  # Will rename to "Company_EI" after loading
#     "Sector_EM",
#     "Company_EM",
#     "Sector_IM",
#     "Company_IM",
#     "sector_exposure_id"
# ]

columns_to_load = [
    "company_name",
    "year",
    "Sector_EI",
    "Compnay_EI",
    "sector_exposure_id"
]

# Load the data
test_file_path = '/Users/mohanganadal/Data Company/Text Processing/Programs/DocumentProcessor/Source Code/Data-Company/AI/NN_Data_Input/X_Train_Stck_Price_Chg_Pct_Pos.csv'  # Replace with your file path
df = pd.read_csv(test_file_path, usecols=columns_to_load)

print(df.shape)
# Rename the column "Compnay_EI" to "Company_EI"
df.rename(columns={"Compnay_EI": "Company_EI"}, inplace=True)

company_filter = df["company_name"] == "PIONEER NATURAL RESOURCES"
year_filetr = df["year"] == 2013
sample_df1 = df.where(company_filter).dropna()
sample_df2 = sample_df1.where(year_filetr).dropna()
data_input_nn = sample_df2.drop('company_name', axis=1).drop('year', axis=1)
# print(data_input_nn)

standard_scalar = StandardScaler()
X_scaled = standard_scalar.fit_transform(data_input_nn)

predicted_values = best_model.predict(X_scaled)
print(predicted_values.flatten())

print(predicted_values.flatten().sum())

(497, 5)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[0.3245357  0.10762902 0.11390446 0.18155478 0.12672329 0.09530315
 0.17777896 0.12447013 0.09530315 0.09530315]
1.4425058


In [30]:
best_model.summary()

In [None]:
# Assuming `model` is your MobileNet model
for layer in best_model.layers:
    print(f"Layer Name: {layer.name}")
    weights = layer.get_weights()  # Get weights and biases
    if weights:  # Some layers might not have weights
        print("Weights:", weights[0])  # Kernel weights
        print("Biases:", weights[1] if len(weights) > 1 else None)

In [34]:
import pandas as pd

# Load the data (assuming the data is in a CSV file)
# Replace 'file.csv' with the actual filename
file_path = '/Users/mohanganadal/Data Company/Text Processing/Programs/DocumentProcessor/Source Code/Data-Company/AI/NN_Data_Input/Exp_PCT_CHG_Updated.csv'
df = pd.read_csv(file_path)


# Assuming your dataframe is named df
# First, sort the dataframe by company_name, year, and sector_exposure_path_name
df = df.sort_values(by=['company_name', 'year', 'sector_exposure_path_name'])

# Now, we need to calculate pct_change for each group of company_name and sector_exposure_path_name
grouped_df = df.groupby(['company_name', 'sector_exposure_path_name'])[[
    'Normalized_Stock_Price_Change','Exp_Pct_Chg']].sum()

# The pct_change method calculates the percentage change from the prior row, so no need to manually implement the formula.
# pct_change automatically calculates: (current - prior) / prior * 100

# Optional: Convert the pct_change to percentage format by multiplying by 100, if you need it as a percentage (not a decimal)
# df['Exp_Pct_Chg'] = df['Exp_Pct_Chg'] * 100

# If you want to see the updated dataframe:
print(grouped_df)

# Save the updated dataset
output_file_path = '/Users/mohanganadal/Data Company/Text Processing/Programs/DocumentProcessor/Source Code/Data-Company/AI/NN_Data_Input/Exp_PCT_CHG_Summarized.csv'
grouped_df.to_csv(output_file_path, index=False)

print(f"Updated file saved to {output_file_path}.")

                                                                          Normalized_Stock_Price_Change  \
company_name          sector_exposure_path_name                                                           
ANTERO RESOURCES Corp Associated Energy Costs                                                 14.461116   
                      Dividend/reinvestment balance for long-term growth                       7.882282   
                      GDP and Disposable Income                                               17.955125   
                      Innovation                                                              18.720362   
                      Market Transition                                                       17.958664   
...                                                                                                 ...   
VITAL ENERGY, INC.    Professional Skills Development                                        -11.941073   
                      Provisioning se