<a href="https://colab.research.google.com/github/ganashreecs22/ml_lab/blob/main/logistic_linear_multilinear.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
#linear regression
import csv

# Load data from CSV
def load_data(filename):
    X = []
    Y = []
    with open(filename, 'r') as file:
        reader = csv.DictReader(file)
        for row in reader:
            X.append(float(row['YearsExperience']))
            Y.append(float(row['Salary']))
    return X, Y

# Mean
def mean(values):
    return sum(values) / len(values)

# Variance
def variance(values, mean_val):
    return sum((x - mean_val) ** 2 for x in values)

# Covariance
def covariance(X, mean_x, Y, mean_y):
    return sum((X[i] - mean_x) * (Y[i] - mean_y) for i in range(len(X)))

# Coefficients (slope and intercept)
def coefficients(X, Y):
    mean_x = mean(X)
    mean_y = mean(Y)
    covar = covariance(X, mean_x, Y, mean_y)
    var_x = variance(X, mean_x)
    m = covar / var_x
    b = mean_y - m * mean_x
    return m, b

# Predict
def predict(X, m, b):
    return [m * x + b for x in X]

# Mean Squared Error
def mean_squared_error(actual, predicted):
    return sum((a - p) ** 2 for a, p in zip(actual, predicted)) / len(actual)

# --- MAIN EXECUTION ---

# Load data
X, Y = load_data("/content/Salary_Data.csv")

# Train model
m, b = coefficients(X, Y)
print(f"Learned model: Salary = {m:.2f} * YearsExperience + {b:.2f}")

# Make predictions
predictions = predict(X, m, b)

# Evaluate
mse = mean_squared_error(Y, predictions)
print(f"Mean Squared Error: {mse:.2f}")

# Display predictions
for i in range(len(X)):
    print(f"Years: {X[i]}, Actual Salary: {Y[i]}, Predicted: {predictions[i]:.2f}")


Learned model: Salary = 9449.96 * YearsExperience + 25792.20
Mean Squared Error: 31270951.72
Years: 1.1, Actual Salary: 39343.0, Predicted: 36187.16
Years: 1.3, Actual Salary: 46205.0, Predicted: 38077.15
Years: 1.5, Actual Salary: 37731.0, Predicted: 39967.14
Years: 2.0, Actual Salary: 43525.0, Predicted: 44692.12
Years: 2.2, Actual Salary: 39891.0, Predicted: 46582.12
Years: 2.9, Actual Salary: 56642.0, Predicted: 53197.09
Years: 3.0, Actual Salary: 60150.0, Predicted: 54142.09
Years: 3.2, Actual Salary: 54445.0, Predicted: 56032.08
Years: 3.2, Actual Salary: 64445.0, Predicted: 56032.08
Years: 3.7, Actual Salary: 57189.0, Predicted: 60757.06
Years: 3.9, Actual Salary: 63218.0, Predicted: 62647.05
Years: 4.0, Actual Salary: 55794.0, Predicted: 63592.05
Years: 4.0, Actual Salary: 56957.0, Predicted: 63592.05
Years: 4.1, Actual Salary: 57081.0, Predicted: 64537.05
Years: 4.5, Actual Salary: 61111.0, Predicted: 68317.03
Years: 4.9, Actual Salary: 67938.0, Predicted: 72097.02
Years: 5.1,

In [5]:
# Multiple Linear Regression from Scratch with Categorical Handling

def load_csv(filename):
    with open(filename, 'r') as file:
        lines = file.readlines()
        headers = lines[0].strip().split(',')
        data = []
        for line in lines[1:]:
            values = line.strip().split(',')
            row = []
            for value in values:
                # Convert categorical Yes/No to 1/0
                if value.lower() == 'yes':
                    row.append(1.0)
                elif value.lower() == 'no':
                    row.append(0.0)
                else:
                    row.append(float(value))
            data.append(row)
    return headers, data

def mean(values):
    return sum(values) / len(values)

def variance(values, mean_val):
    return sum((x - mean_val) ** 2 for x in values)

def covariance(x, x_mean, y, y_mean):
    return sum((x[i] - x_mean) * (y[i] - y_mean) for i in range(len(x)))

def coefficients(dataset):
    X = [row[:-1] for row in dataset]
    Y = [row[-1] for row in dataset]
    means = [mean(col) for col in zip(*X)]
    y_mean = mean(Y)
    coeffs = []
    for i in range(len(X[0])):
        cov = covariance([row[i] for row in X], means[i], Y, y_mean)
        var = variance([row[i] for row in X], means[i])
        coeffs.append(cov / var if var != 0 else 0)
    intercept = y_mean - sum(coeffs[i] * means[i] for i in range(len(coeffs)))
    return intercept, coeffs

def predict(row, intercept, coeffs):
    return intercept + sum(coeffs[i] * row[i] for i in range(len(coeffs)))

def mean_squared_error(actual, predicted):
    return sum((actual[i] - predicted[i]) ** 2 for i in range(len(actual))) / len(actual)

# MAIN
if __name__ == "__main__":
    headers, data = load_csv("/content/Student_Performance.csv")

    intercept, coeffs = coefficients(data)

    print("Intercept:", intercept)
    print("Coefficients:")
    for i in range(len(coeffs)):
        print(f"{headers[i]}: {coeffs[i]}")

    predictions = [predict(row[:-1], intercept, coeffs) for row in data]
    actual = [row[-1] for row in data]

    mse = mean_squared_error(actual, predictions)
    print("Mean Squared Error:", mse)

    for i in range(len(data)):
        print(f"Actual: {actual[i]}, Predicted: {predictions[i]:.2f}")


Intercept: -34.381635338725914
Coefficients:
Hours Studied: 2.7730628246389557
Previous Scores: 1.0138367015830627
Extracurricular Activities: 0.9423777675793193
Sleep Hours: 0.5449945877537364
Sample Question Papers Practiced: 0.28991782346265094
Mean Squared Error: 4.31274844054749
Actual: 91.0, Predicted: 91.54
Actual: 65.0, Predicted: 62.61
Actual: 45.0, Predicted: 44.85
Actual: 36.0, Predicted: 36.45
Actual: 66.0, Predicted: 66.88
Actual: 61.0, Predicted: 59.66
Actual: 63.0, Predicted: 64.45
Actual: 42.0, Predicted: 38.29
Actual: 61.0, Predicted: 62.49
Actual: 69.0, Predicted: 69.12
Actual: 84.0, Predicted: 83.69
Actual: 73.0, Predicted: 71.75
Actual: 27.0, Predicted: 27.07
Actual: 33.0, Predicted: 32.67
Actual: 68.0, Predicted: 65.71
Actual: 43.0, Predicted: 47.21
Actual: 67.0, Predicted: 68.27
Actual: 70.0, Predicted: 71.74
Actual: 30.0, Predicted: 31.64
Actual: 63.0, Predicted: 59.34
Actual: 71.0, Predicted: 72.75
Actual: 85.0, Predicted: 84.49
Actual: 73.0, Predicted: 72.10
Ac

In [12]:
import random

def load_csv(filename):
    data = []
    with open(filename, 'r') as f:
        lines = f.readlines()
        headers = lines[0].strip().split(',')

        # Columns to exclude (drop non-numeric or irrelevant)
        exclude_cols = ['Risk_MM', 'RainTomorrow', 'Date', 'Location',
                        'WindGustDir', 'WindDir9am', 'WindDir3pm']
        exclude_indices = [headers.index(col) for col in exclude_cols if col in headers]
        target_index = headers.index('RainTomorrow')

        for line in lines[1:]:
            values = line.strip().split(',')

            # Skip if target missing or invalid
            target_val = values[target_index]
            if target_val not in ['Yes', 'No']:
                continue

            row = []

            skip = False
            for i, val in enumerate(values):
                if i in exclude_indices:
                    continue
                if val == '':
                    skip = True
                    break
                try:
                    row.append(float(val))
                except ValueError:
                    skip = True
                    break

            if skip:
                continue

            # Add bias term 1.0 for intercept
            row.insert(0, 1.0)

            # Append label as float
            label = 1.0 if target_val == 'Yes' else 0.0
            row.append(label)
            data.append(row)
    return data

def sigmoid(z):
    import math
    return 1 / (1 + math.exp(-z))

def predict(features, weights):
    z = sum(w * x for w, x in zip(weights, features))
    return sigmoid(z)

def train_logistic_regression(data, lr=0.01, epochs=1000):
    num_features = len(data[0]) - 1
    weights = [0.0] * num_features

    for epoch in range(epochs):
        for row in data:
            features = row[:-1]
            label = row[-1]
            pred = predict(features, weights)
            error = label - pred
            for i in range(num_features):
                weights[i] += lr * error * features[i]
    return weights

def evaluate(data, weights):
    correct = 0
    total = 0
    for row in data:
        features = row[:-1]
        label = row[-1]
        prediction = predict(features, weights)
        predicted_class = 1.0 if prediction >= 0.5 else 0.0
        if predicted_class == label:
            correct += 1
        total += 1
    return correct / total if total > 0 else 0

if __name__ == "__main__":
    path = "/content/weatherAUS.csv"
    data = load_csv(path)

    if not data:
        print("No data loaded. Please check your CSV content.")
    else:
        # Proper random shuffle
        random.shuffle(data)

        # 80-20 train-test split
        split_index = int(0.8 * len(data))
        train_data = data[:split_index]
        test_data = data[split_index:]

        # Train and evaluate
        weights = train_logistic_regression(train_data, lr=0.01, epochs=1000)
        accuracy = evaluate(test_data, weights)
        print("Model Accuracy:", round(accuracy * 100, 2), "%")


No data loaded. Please check your CSV content.
