In [4]:
import pandas as pd

df_G23AI2132 = pd.read_csv('/content/sample_data/boston.csv')
threshold = 10
discrete_vars = [col for col in df_G23AI2132.columns if df_G23AI2132[col].nunique() < threshold]
continuous_vars = [col for col in df_G23AI2132.columns if df_G23AI2132[col].nunique() >= threshold]
print("Discrete Variables:", discrete_vars)
print("Continuous Variables:", continuous_vars)


Discrete Variables: ['CHAS', 'RAD']
Continuous Variables: ['CRIM', 'ZN', 'INDUS', 'NOX', 'RM', 'AGE', 'DIS', 'TAX', 'PT', 'B', 'LSTAT', 'MV']


In [5]:
descriptive_stats = df_G23AI2132.describe()
print(descriptive_stats)

             CRIM          ZN       INDUS        CHAS         NOX          RM  \
count  506.000000  506.000000  506.000000  506.000000  506.000000  506.000000   
mean     3.613524   11.363636   11.136779    0.069170    0.554695    6.284634   
std      8.601545   23.322453    6.860353    0.253994    0.115878    0.702617   
min      0.006320    0.000000    0.460000    0.000000    0.385000    3.561000   
25%      0.082045    0.000000    5.190000    0.000000    0.449000    5.885500   
50%      0.256510    0.000000    9.690000    0.000000    0.538000    6.208500   
75%      3.677083   12.500000   18.100000    0.000000    0.624000    6.623500   
max     88.976196  100.000000   27.740000    1.000000    0.871000    8.780000   

              AGE         DIS         RAD         TAX          PT           B  \
count  506.000000  506.000000  506.000000  506.000000  506.000000  506.000000   
mean    68.574901    3.795043    9.549407  408.237154   18.455534  356.674030   
std     28.148862    2.1057

In [6]:
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

X = df_G23AI2132.drop('MV', axis=1)
y = df_G23AI2132['MV']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = LinearRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f"MSE: {mse}, R²: {r2}")


MSE: 24.291117338821863, R²: 0.6687595287571355


In [7]:
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import SGDRegressor

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

sgd = SGDRegressor()
sgd.fit(X_train, y_train)
y_pred = sgd.predict(X_test)

r2 = r2_score(y_test, y_pred)
print(f"R²: {r2}")


R²: 0.6615632352287236


In [9]:
import numpy as np
corr_matrix = df_G23AI2132.corr().abs()
upper = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(bool))
to_drop = [column for column in upper.columns if any(upper[column] > 0.95)]
X_reduced = X.drop(columns=to_drop)

In [10]:
from sklearn.decomposition import PCA

for n_components in range(1, X.shape[1] + 1):
    pca = PCA(n_components=n_components)
    X_pca = pca.fit_transform(X_scaled)
    X_train, X_test, y_train, y_test = train_test_split(X_pca, y, test_size=0.2, random_state=42)
    lin_reg = LinearRegression()
    lin_reg.fit(X_train, y_train)
    y_pred_lin = lin_reg.predict(X_test)
    r2_lin = r2_score(y_test, y_pred_lin)
    sgd = SGDRegressor()
    sgd.fit(X_train, y_train)
    y_pred_sgd = sgd.predict(X_test)
    r2_sgd = r2_score(y_test, y_pred_sgd)

    print(f"PCA Components: {n_components}, Linear R²: {r2_lin}, SGD R²: {r2_sgd}")


PCA Components: 1, Linear R²: 0.4239092685480432, SGD R²: 0.4220678734321405
PCA Components: 2, Linear R²: 0.3795979350082046, SGD R²: 0.3861560208635746
PCA Components: 3, Linear R²: 0.605487220772448, SGD R²: 0.6071428165886634
PCA Components: 4, Linear R²: 0.6192325627434228, SGD R²: 0.6202965381389948
PCA Components: 5, Linear R²: 0.5933694801950906, SGD R²: 0.5909950213689776
PCA Components: 6, Linear R²: 0.595330009387335, SGD R²: 0.5940205764273949
PCA Components: 7, Linear R²: 0.5952775227347431, SGD R²: 0.5947061418045803
PCA Components: 8, Linear R²: 0.598214066690764, SGD R²: 0.5977918910550548
PCA Components: 9, Linear R²: 0.5975892820168489, SGD R²: 0.599207621290287
PCA Components: 10, Linear R²: 0.6079476963014818, SGD R²: 0.607284834745919
PCA Components: 11, Linear R²: 0.6177559772042688, SGD R²: 0.617463347696927
PCA Components: 12, Linear R²: 0.6464748427512272, SGD R²: 0.6469863257650375
PCA Components: 13, Linear R²: 0.6687595287571346, SGD R²: 0.6634536947394749


In [11]:
X_encoded = pd.get_dummies(X, drop_first=True)
X_train, X_test, y_train, y_test = train_test_split(X_encoded, y, test_size=0.2, random_state=42)

model = LinearRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

r2 = r2_score(y_test, y_pred)
print(f"Linear Regression with One-Hot Encoding R²: {r2}")


Linear Regression with One-Hot Encoding R²: 0.6687595287571355


In [14]:
pip3 install optuna

SyntaxError: invalid syntax (<ipython-input-14-5f2ac9acce01>, line 1)

In [16]:
!pip install optuna
import optuna
from sklearn.linear_model import Ridge
from sklearn.model_selection import cross_val_score

def objective(trial):
    alpha = trial.suggest_loguniform('alpha', 1e-4, 1e4)
    model = Ridge(alpha=alpha)
    score = cross_val_score(model, X_scaled, y, cv=5, scoring='r2').mean()
    return score

study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=100)

print(f"Best hyperparameters: {study.best_params}")
print(f"Best R²: {study.best_value}")


Collecting optuna
  Downloading optuna-4.1.0-py3-none-any.whl.metadata (16 kB)
Collecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.14.0-py3-none-any.whl.metadata (7.4 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.9.0-py3-none-any.whl.metadata (10 kB)
Collecting Mako (from alembic>=1.5.0->optuna)
  Downloading Mako-1.3.6-py3-none-any.whl.metadata (2.9 kB)
Downloading optuna-4.1.0-py3-none-any.whl (364 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m364.4/364.4 kB[0m [31m6.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading alembic-1.14.0-py3-none-any.whl (233 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m233.5/233.5 kB[0m [31m15.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading colorlog-6.9.0-py3-none-any.whl (11 kB)
Downloading Mako-1.3.6-py3-none-any.whl (78 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m78.6/78.6 kB[0m [31m5.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: Ma

[I 2024-11-22 17:57:57,657] A new study created in memory with name: no-name-e481604b-3310-4385-81f6-0584528e3939
  alpha = trial.suggest_loguniform('alpha', 1e-4, 1e4)
[I 2024-11-22 17:57:57,682] Trial 0 finished with value: 0.35327679585799493 and parameters: {'alpha': 0.00011572115345213155}. Best is trial 0 with value: 0.35327679585799493.
  alpha = trial.suggest_loguniform('alpha', 1e-4, 1e4)
[I 2024-11-22 17:57:57,706] Trial 1 finished with value: 0.37320658056455824 and parameters: {'alpha': 2.921571250124511}. Best is trial 1 with value: 0.37320658056455824.
  alpha = trial.suggest_loguniform('alpha', 1e-4, 1e4)
[I 2024-11-22 17:57:57,733] Trial 2 finished with value: 0.462854208160193 and parameters: {'alpha': 180.4796070292148}. Best is trial 2 with value: 0.462854208160193.
  alpha = trial.suggest_loguniform('alpha', 1e-4, 1e4)
[I 2024-11-22 17:57:57,759] Trial 3 finished with value: 0.3532812548093539 and parameters: {'alpha': 0.0006905013274453881}. Best is trial 2 with va

Best hyperparameters: {'alpha': 95.91417412335704}
Best R²: 0.47612535967076913


In [20]:
!pip install flask
!pip install pyngrok
!pip install joblib
from flask import Flask, request, render_template_string
from pyngrok import ngrok
import joblib
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
import threading
ngrok.set_auth_token("2pDRNAPZ3xiG6GgsyFcneOSBw4M_5f4JWHdqWB4jb7gVdCod5")
df_G23AI2132 = pd.read_csv('/content/sample_data/boston.csv')

X = df_G23AI2132.drop('MV', axis=1)
y = df_G23AI2132['MV']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = LinearRegression()
model.fit(X_train, y_train)
joblib.dump(model, 'boston_model.pkl')
app = Flask(__name__)
model = joblib.load('boston_model.pkl')
html = """
<!DOCTYPE html>
<html>
<head>
    <title>Boston MV Prediction</title>
    <style>
        body { font-family: Arial, sans-serif; margin: 40px; }
        h1 { color: #333; }
        form { margin-bottom: 20px; }
        label { display: inline-block; width: 150px; margin-bottom: 10px; }
        input[type="text"] { width: 200px; padding: 5px; }
        input[type="submit"] { padding: 10px 20px; }
        .result { margin-top: 20px; font-size: 1.2em; color: green; }
    </style>
</head>
<body>
    <h1>Predict Boston MV</h1>
    <form action="/predict" method="post">
        {% for feature in features %}
            <label for="{{feature}}">{{feature}}:</label>
            <input type="text" id="{{feature}}" name="{{feature}}" required><br><br>
        {% endfor %}
        <input type="submit" value="Predict">
    </form>
    {% if prediction %}
        <div class="result">
            <h2>Predicted MV: {{ prediction }}</h2>
        </div>
    {% endif %}
</body>
</html>
"""

feature_names = X.columns.tolist()
@app.route('/')
def home():
    return render_template_string(html, features=feature_names)
@app.route('/predict', methods=['POST'])
def predict():
    try:
        input_features = []
        for feature in feature_names:
            value = float(request.form[feature])
            input_features.append(value)

        input_df = pd.DataFrame([input_features], columns=feature_names)

        prediction = model.predict(input_df)[0]
        return render_template_string(html, features=feature_names, prediction=round(prediction, 2))
    except Exception as e:
        return f"An error occurred: {e}"

def run_app():
    app.run(host='0.0.0.0', port=5000)

public_url = ngrok.connect(5000)
print(f"Public URL: {public_url}")
thread = threading.Thread(target=run_app)
thread.start()


Traceback (most recent call last):
  File "<frozen importlib._bootstrap>", line 1027, in _find_and_load
  File "<frozen importlib._bootstrap>", line 1006, in _find_and_load_unlocked
  File "<frozen importlib._bootstrap>", line 674, in _load_unlocked
  File "<frozen importlib._bootstrap>", line 577, in module_from_spec
  File "<frozen importlib._bootstrap>", line 555, in _init_module_attrs
KeyboardInterrupt

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/usr/local/bin/pip3", line 5, in <module>
    from pip._internal.cli.main import main
  File "/usr/local/lib/python3.10/dist-packages/pip/_internal/cli/main.py", line 11, in <module>
    from pip._internal.cli.autocompletion import autocomplete
  File "/usr/local/lib/python3.10/dist-packages/pip/_internal/cli/autocompletion.py", line 10, in <module>
    from pip._internal.cli.main_parser import create_main_parser
  File "/usr/local/lib/python3.10/dist-packages/pip/_interna