In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import joblib
import os

In [2]:
def train_model():
    try:
        # Load the dataset
        df = pd.read_csv('Chapter2_Shale_Gas_Wells_DataSet.csv')
        
        # Separate features (X) and target (Z)
        X = df.iloc[:, :13]  # Selecting the first 13 columns as features
        Z = df.iloc[:, -2]   # Selecting the second-to-last column as the target variable
        
        # Split the data into training and testing sets
        X_train, X_test, Z_train, Z_test = train_test_split(X, Z, test_size=0.3, random_state=42)
        
        # Scale the features
        scaler = StandardScaler()
        X_train_scaled = scaler.fit_transform(X_train)
        X_test_scaled = scaler.transform(X_test)
        
        # Create and train the model
        model = LinearRegression()
        model.fit(X_train_scaled, Z_train)
        
        # Save the model and scaler for later use
        joblib.dump(model, 'linear_regression_model.pkl')
        joblib.dump(scaler, 'scaler.pkl')
        
        # Evaluate the model
        Z_pred = model.predict(X_test_scaled)
        mse = mean_squared_error(Z_test, Z_pred)
        mae = mean_absolute_error(Z_test, Z_pred)
        r2 = r2_score(Z_test, Z_pred)
        
        print(f"Training Complete\nMSE: {mse}, MAE: {mae}, R2: {r2}")
        return {"mse": mse, "mae": mae, "r2": r2}
    except FileNotFoundError:
        print("Error: Dataset file not found.")
        return {"error": "Dataset file not found"}
    except Exception as e:
        print(f"An error occurred during training: {e}")
        return {"error": str(e)}


In [3]:
# Train the model and save the trained model and scaler
metrics = train_model()
print(metrics)


Training Complete
MSE: 2.4961007206907646, MAE: 1.0897426619568948, R2: 0.6982202815810612
{'mse': 2.4961007206907646, 'mae': 1.0897426619568948, 'r2': 0.6982202815810612}


In [4]:
def predict(input_data):
    model_path = 'linear_regression_model.pkl'
    scaler_path = 'scaler.pkl'
    
    try:
        # Check if the model and scaler files exist before loading
        if not os.path.exists(model_path) or not os.path.exists(scaler_path):
            return "Model or scaler file not found. Please train the model first."

        # Load the saved model and scaler
        model = joblib.load(model_path)
        scaler = joblib.load(scaler_path)
        
        # Ensure input_data is in the correct shape for prediction
        input_data = np.array(input_data).reshape(1, -1)  # Reshape for single prediction
        input_data_scaled = scaler.transform(input_data)  # Scale input data
        
        # Make prediction
        prediction = model.predict(input_data_scaled)
        return prediction[0]  # Return the predicted value
    except Exception as e:
        print(f"An error occurred during prediction: {e}")
        return f"Error: {str(e)}"


In [5]:
# Test the prediction function with sample input data
sample_input_data = [
    1000, 5, 1200, 30, 150, 8000, 25, 10, 5000, 30, 40, 0.8, 20
]

prediction = predict(sample_input_data)
print(f"Predicted EUR: {prediction} Bbl")


Predicted EUR: 10.53509976460328 Bbl


