In [2]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression

def log_regression(start_year, end_year, pct_train):
    # Create the years array
    years = np.arange(start_year, end_year + 1)

    # Calculate the time offset from 1900
    time = years - 1900

    # Generate some dummy data for the number of Dorena cases
    d = 100 * (2.4) ** time + np.random.normal(0, 1, len(time))

    # Calculate the logarithm of the data
    log_d = np.log(d)

    # Split the data into training and testing sets
    train_size = int(pct_train * len(time))
    train_time = time[:train_size]
    test_time = time[train_size:]
    train_log_d = log_d[:train_size]
    test_log_d = log_d[train_size:]

    # Fit the linear regression model to the training data
    reg = LinearRegression().fit(train_time.reshape(-1, 1), train_log_d)

    # Predict the logarithm of the number of Dorena cases for the training and testing sets
    train_log_d_pred = reg.predict(train_time.reshape(-1, 1))
    test_log_d_pred = reg.predict(test_time.reshape(-1, 1))

    # Calculate the accuracy of the model on the training and testing sets
    train_accuracy = reg.score(train_time.reshape(-1, 1), train_log_d)
    test_accuracy = reg.score(test_time.reshape(-1, 1), test_log_d)

    # Plot the data and the regression line
    plt.scatter(train_time, train_log_d, label="Training Data")
    plt.scatter(test_time, test_log_d, label="Testing Data")
    plt.plot(time, reg.predict(time.reshape(-1, 1)), color='red', label="Regression Line")
    plt.xlabel("Year")
    plt.ylabel("Log of Dorena cases")
    plt.title("Linear Regression")
    plt.legend()
    plt.show()

    # Print the accuracy of the model on the training and testing sets
    print("Training accuracy:", train_accuracy)
    print("Testing accuracy:", test_accuracy)


ModuleNotFoundError: No module named 'numpy'