<a href="https://colab.research.google.com/github/ayushiiii28/Manufacturing_Prediction/blob/main/smd.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [31]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, f1_score
import joblib

# Load dataset
def load_dataset(file_path):
    data = pd.read_csv("/content/smd.csv")
    return data

# Train model
def train_model(data_path, model_type="logistic"):
    # Load the data
    data = pd.read_csv(content/synthetic_manufacturing_data.csv)

    # Define features and target
    X = data[["Temperature", "Run_Time"]]
    y = data["Downtime_Flag"]

    # Split data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Select the model
    if model_type == "logistic":
        model = LogisticRegression()
    elif model_type == "decision_tree":
        model = DecisionTreeClassifier()
    else:
        raise ValueError("Invalid model_type. Choose 'logistic' or 'decision_tree'.")

    # Train the model
    model.fit(X_train, y_train)

    # Test the model
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred, pos_label=1)  # Assuming 1 means 'Downtime'

    # Save the model
    joblib.dump(model, "trained_model.pkl")
    return {"accuracy": accuracy, "f1_score": f1}




In [33]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, f1_score
from imblearn.over_sampling import SMOTE
import joblib

def load_dataset(file_path):
    try:
        print(f"Loading dataset from: {file_path}")
        data = pd.read_csv("/content/smd.csv")
        print("Dataset loaded successfully!")
        return data
    except Exception as e:
        print(f"Error loading dataset: {e}")
        return None

def preprocess_data(data):
    required_columns = {"Temperature", "Run_Time", "Downtime_Flag"}
    if not required_columns.issubset(data.columns):
        raise ValueError(f"Dataset must contain columns: {required_columns}")

    X = data[["Temperature", "Run_Time"]]
    y = data["Downtime_Flag"]

    # Handle class imbalance
    smote = SMOTE(random_state=42)
    X_resampled, y_resampled = smote.fit_resample(X, y)

    # Feature scaling
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X_resampled)

    return X_scaled, y_resampled, scaler

def train_logistic_regression(X_train, y_train):
    param_grid = {'C': [0.01, 0.1, 1, 10, 100]}
    grid = GridSearchCV(LogisticRegression(), param_grid, cv=5, scoring='accuracy')
    grid.fit(X_train, y_train)
    return grid.best_estimator_, grid.best_params_

def train_decision_tree(X_train, y_train):
    param_grid = {
        'max_depth': [3, 5, 10, None],
        'min_samples_split': [2, 5, 10],
        'min_samples_leaf': [1, 2, 5]
    }
    grid = GridSearchCV(DecisionTreeClassifier(), param_grid, cv=5, scoring='accuracy')
    grid.fit(X_train, y_train)
    return grid.best_estimator_, grid.best_params_

def train_random_forest(X_train, y_train):
    param_grid = {
        'n_estimators': [50, 100, 200],
        'max_depth': [5, 10, None],
        'min_samples_split': [2, 5, 10],
        'min_samples_leaf': [1, 2, 5]
    }
    grid = GridSearchCV(RandomForestClassifier(random_state=42), param_grid, cv=5, scoring='accuracy')
    grid.fit(X_train, y_train)
    return grid.best_estimator_, grid.best_params_

def evaluate_model(model, X_test, y_test):
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    return accuracy, f1

def main():
    # Load dataset
    file_path = "synthetic_manufacturing_data.csv"  # Replace with your dataset path
    data = load_dataset(file_path)
    if data is None:
        return

    # Preprocess data
    X, y, scaler = preprocess_data(data)

    # Split data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Train and evaluate Logistic Regression
    log_model, log_params = train_logistic_regression(X_train, y_train)
    log_accuracy, log_f1 = evaluate_model(log_model, X_test, y_test)
    print("Logistic Regression Results:", {
        'accuracy': log_accuracy,
        'f1_score': log_f1,
        'best_params': log_params
    })

    # Save Logistic Regression model
    joblib.dump(log_model, "logistic_model.pkl")

    # Train and evaluate Decision Tree
    tree_model, tree_params = train_decision_tree(X_train, y_train)
    tree_accuracy, tree_f1 = evaluate_model(tree_model, X_test, y_test)
    print("Decision Tree Results:", {
        'accuracy': tree_accuracy,
        'f1_score': tree_f1,
        'best_params': tree_params
    })

    # Save Decision Tree model
    joblib.dump(tree_model, "decision_tree_model.pkl")

    # Train and evaluate Random Forest
    rf_model, rf_params = train_random_forest(X_train, y_train)
    rf_accuracy, rf_f1 = evaluate_model(rf_model, X_test, y_test)
    print("Random Forest Results:", {
        'accuracy': rf_accuracy,
        'f1_score': rf_f1,
        'best_params': rf_params
    })

    # Save Random Forest model
    joblib.dump(rf_model, "random_forest_model.pkl")

if __name__ == "__main__":
    main()


Loading dataset from: synthetic_manufacturing_data.csv
Dataset loaded successfully!
Logistic Regression Results: {'accuracy': 0.4878048780487805, 'f1_score': 0.43243243243243246, 'best_params': {'C': 10}}
Decision Tree Results: {'accuracy': 0.5121951219512195, 'f1_score': 0.5238095238095238, 'best_params': {'max_depth': 10, 'min_samples_leaf': 2, 'min_samples_split': 10}}
Random Forest Results: {'accuracy': 0.5365853658536586, 'f1_score': 0.4864864864864865, 'best_params': {'max_depth': 5, 'min_samples_leaf': 2, 'min_samples_split': 5, 'n_estimators': 50}}
