In [None]:
# Import dependencies
import pandas as pd
import tensorflow as tf
import numpy as np
from tensorflow import keras
from tensorflow.keras import layers
import sklearn as skl
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder


In [None]:
# Show TensorFlow version
print("TensorFlow version:", tf.__version__)

In [None]:
# Connect notebook to google drive
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# Read CSV file from google drive into a DataFrame and display it
df=pd.read_csv("/content/drive/MyDrive/Project_4/df_cleaned.csv")
df

In [None]:
# Give me a list of every unique value in the Basin_category column

unique_values = df['BASIN_CATEGORY'].unique().tolist()
print(unique_values)


In [None]:
# Create a new dataframe named df_gulf that has only rows from df that have the value 'Gulf Coast' in BASIN_CATEGORY

df_gulf = df[df["BASIN_CATEGORY"] == "Gulf Coast"]
df_gulf


In [None]:
# Show how many null values each feature has
df_gulf.isnull().sum()


In [None]:
# Drop all non numeric columns

df_gulf = df_gulf.select_dtypes(include=["number"])

# Print the updated DataFrame
df_gulf


In [None]:
# Show how many null values each feature has
df_gulf.isnull().sum()

In [None]:
# Create a new dataframe named df_gulf_clean with only columns (PH, HC03, Ca, Cl, K, KNa, Li, Mh, Na, So4, CHARGEBAL) from df_gulf

df_gulf_clean = df_gulf[["PH", "HCO3", "Ca", "Cl", "K", "Li", "Mg", "Na", "SO4", "CHARGEBAL"]]
df_gulf_clean


In [None]:
# Show how many null values each feature has
df_gulf_clean.isnull().sum()

In [None]:
# Drop columns with any null values unless they are in column "Li"

df_gulf_clean.dropna(axis=0, how='any', subset=df_gulf_clean.columns.difference(['Li']), inplace=True)


In [None]:
# Check data types
df_gulf_clean.dtypes

In [None]:
# Fill in remaining null values with 0's
df_gulf_clean.fillna(0, inplace=True)

In [None]:
# Separate faetures and variable
X = df_gulf_clean.drop(columns=['Li'])
y = df_gulf_clean['Li']

In [None]:
# Splitting the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)


In [None]:
# Scaling the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [None]:
# Building the neural network model
nn_model = tf.keras.Sequential([
    tf.keras.layers.Dense(64, activation='relu', input_shape=(X_train_scaled.shape[1],)),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(1, activation= 'relu')  # Output layer
])

In [None]:
# Compiling the model
nn_model.compile(optimizer='adam', loss='mean_squared_error', metrics=["accuracy"] )

In [None]:
# Check the structure of the Sequential model
nn_model.summary()

In [None]:
# Run 50 epochs on the model
history = nn_model.fit(X_train_scaled, y_train, epochs=50, batch_size=32, validation_split=0.1)

In [None]:
# Evaluating the model
loss = nn_model.evaluate(X_test_scaled, y_test)
print(f'Test Loss: {loss}')