In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn import linear_model
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
%matplotlib inline

# Load the data
url = "https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBMDeveloperSkillsNetwork-ML0101EN-SkillsNetwork/labs/Module%202/data/FuelConsumptionCo2.csv"
df = pd.read_csv(url)

# Select features
cdf = df[['ENGINESIZE','CYLINDERS','FUELCONSUMPTION_COMB','CO2EMISSIONS']]

# Define independent (X) and dependent (y) variables
X = np.asarray(cdf[['ENGINESIZE']])
y = np.asarray(cdf['CO2EMISSIONS'])

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create and train the model
regr = linear_model.LinearRegression()
regr.fit(X_train, y_train)

# Output coefficients
print('Coefficients: ', regr.coef_)
print('Intercept: ', regr.intercept_)

# Make predictions
y_pred = regr.predict(X_test)

# Evaluation
print("Mean absolute error: %.2f" % mean_absolute_error(y_test, y_pred))
print("Mean squared error: %.2f" % mean_squared_error(y_test, y_pred))
print("Root mean squared error: %.2f" % np.sqrt(mean_squared_error(y_test, y_pred)))
print("R2-score: %.2f" % r2_score(y_test, y_pred))

Coefficients:  [38.99297872]
Intercept:  126.28970217408724
Mean absolute error: 24.10
Mean squared error: 985.94
Root mean squared error: 31.40
R2-score: 0.76


In [2]:
import numpy as np
import pandas as pd
from sklearn import linear_model
from sklearn.model_selection import train_test_split

# Load the data
url = "https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBMDeveloperSkillsNetwork-ML0101EN-SkillsNetwork/labs/Module%202/data/FuelConsumptionCo2.csv"
df = pd.read_csv(url)

# Preprocessing: Drop unnecessary columns
df = df.drop(['CYLINDERS', 'FUELCONSUMPTION_CITY', 'FUELCONSUMPTION_HWY','FUELCONSUMPTION_COMB'], axis=1)

# Select multiple features for X
X = np.asarray(df[['ENGINESIZE','FUELCONSUMPTION_COMB_MPG']])
y = np.asarray(df[['CO2EMISSIONS']])

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the multiple linear regression model
regressor = linear_model.LinearRegression()
regressor.fit(X_train, y_train)

# Output parameters
print('Coefficients: ', regressor.coef_)
print('Intercept: ', regressor.intercept_)

# Predict and evaluate
y_pred = regressor.predict(X_test)

Coefficients:  [[17.8581369  -5.01502179]]
Intercept:  [329.1363967]


In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import jaccard_score, confusion_matrix, classification_report
%matplotlib inline

# Load the data
url = "https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBMDeveloperSkillsNetwork-ML0101EN-SkillsNetwork/labs/Module%203/data/ChurnData.csv"
churn_df = pd.read_csv(url)

# Preprocessing
churn_df = churn_df[['tenure', 'age', 'address', 'income', 'ed', 'employ', 'equip', 'callcard', 'wireless', 'churn']]
churn_df['churn'] = churn_df['churn'].astype('int')

# Define features and target
X = np.asarray(churn_df[['tenure', 'age', 'address', 'income', 'ed', 'employ', 'equip']])
y = np.asarray(churn_df['churn'])

# Normalize the dataset
X = preprocessing.StandardScaler().fit(X).transform(X)

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=4)

# Build and train the Logistic Regression model
# 'liblinear' is a good solver for small datasets
LR = LogisticRegression(C=0.01, solver='liblinear').fit(X_train, y_train)

# Predict class labels
yhat = LR.predict(X_test)

# Predict probabilities
yhat_prob = LR.predict_proba(X_test)

# Evaluation using Jaccard Score
print("Jaccard Score: %.2f" % jaccard_score(y_test, yhat, pos_label=0))

# Classification Report
print(classification_report(y_test, yhat))

Jaccard Score: 0.71
              precision    recall  f1-score   support

           0       0.73      0.96      0.83        25
           1       0.86      0.40      0.55        15

    accuracy                           0.75        40
   macro avg       0.79      0.68      0.69        40
weighted avg       0.78      0.75      0.72        40

