In [2]:
import pandas as pd
import numpy as np
from tqdm import tqdm

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.ensemble import GradientBoostingRegressor, AdaBoostRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.svm import SVR
# import xgboost as xgb
# import lightgbm as lgb

from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score


In [3]:
models = {
       'Linear Regression': LinearRegression(),
       'Ridge Regression': Ridge(),
       'Lasso Regression': Lasso(),
       'ElasticNet': ElasticNet(),
       'Decision Tree': DecisionTreeRegressor(),
       'Extra Trees': ExtraTreesRegressor(),
       'Gradient Boosting': GradientBoostingRegressor(),
       'AdaBoost': AdaBoostRegressor(),
       'SVR': SVR(),
       'Neural Network': MLPRegressor(max_iter=1000)
   }

In [4]:
# Loading and preprocessing the data
def load_and_preprocess_data(file_path):
    df = pd.read_csv(file_path)
    df['time'] = pd.to_datetime(df['time'], format='%Y-%m-%d %H:%M:%S', errors='coerce')
    df = df.replace({'NaN': np.nan})
    return df

# Feature preparation
def prepare_features(df):
    features = ['AtmosphericPressure', 'WindDirection', 'WindSpeed', 'Gust', 'AirTemperature', 'SeaTemperature']
    X = df[features]
    y = df['WaveHeight']
    
    X['AtmosphericPressure'] = X['AtmosphericPressure'].str.replace('mb', '').replace('', np.nan).astype(float)
    
    # Convert all columns to numeric, replacing non-numeric values with NaN
    X = X.apply(pd.to_numeric, errors='coerce')
    
    return X, y

In [5]:
# Model Training and eval 
def train_and_evaluate_models(X, y):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    
    results = {}
    
    for name, model in models.items():
        model.fit(X_train_scaled, y_train)
        y_pred = model.predict(X_test_scaled)
        
        mae = mean_absolute_error(y_test, y_pred)
        rmse = np.sqrt(mean_squared_error(y_test, y_pred))
        r2 = r2_score(y_test, y_pred)
        
        results[name] = {'MAE': mae, 'RMSE': rmse, 'R-squared': r2}
        
        print(f"\n{name} Results:")
        print(f"MAE: {mae:.2f}")
        print(f"RMSE: {rmse:.2f}")
        print(f"R-squared: {r2:.2f}")
    
    return results

In [None]:
file_path = "wavedata.csv"
df = load_and_preprocess_data(file_path)
X, y = prepare_features(df)
    
# Remove rows with NaN values in either X or y
mask = ~(X.isna().any(axis=1) | y.isna())
X = X[mask]
y = y[mask]
    
results = train_and_evaluate_models(X, y)

In [None]:
"""
Linear Regression Results:
MAE: 0.53
RMSE: 0.73
R-squared: 0.58

Ridge Regression Results:
MAE: 0.53
RMSE: 0.73
R-squared: 0.58

Lasso Regression Results:
MAE: 0.84
RMSE: 1.13
R-squared: -0.00

ElasticNet Results:
MAE: 0.72
RMSE: 0.97
R-squared: 0.25

Decision Tree Results:
MAE: 0.39
RMSE: 0.66
R-squared: 0.66

Extra Trees Results:
MAE: 0.32
RMSE: 0.48
R-squared: 0.82

Gradient Boosting Results:
MAE: 0.45
RMSE: 0.62
R-squared: 0.70

AdaBoost Results:
MAE: 0.87
RMSE: 0.99
R-squared: 0.24

SVR Results:
MAE: 0.43
RMSE: 0.65
R-squared: 0.67

Neural Network Results:
MAE: 0.44
RMSE: 0.60
R-squared: 0.71    

"""