# Simple Linear Regression with scikit-learn Cheatsheet

### 1. Import Libraries

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

### 2. Loading Data

In [None]:
# Example using a CSV file
data = pd.read_csv('data.csv')

# Features and target variable
X = data[['feature1', 'feature2', 'feature3']]  # Replace with your feature columns
y = data['target']  # Replace with your target column


### 3. Splitting Data

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


### 4. Creating and Training the Model

In [None]:
model = LinearRegression()
model.fit(X_train, y_train)

### 5. Making Predictions

In [None]:
y_pred = model.predict(X_test)

### 6. Evaluating the Model

In [None]:
# Mean Squared Error (MSE)
mse = mean_squared_error(y_test, y_pred)
print('Mean Squared Error:', mse)

# R-squared (R2)
r2 = r2_score(y_test, y_pred)
print('R-squared:', r2)


### 7. Model Coefficients

In [None]:
# Coefficients (weights) and Intercept
print('Coefficients:', model.coef_)
print('Intercept:', model.intercept_)


# Full Example Code

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# Load data
data = pd.read_csv('data.csv')
X = data[['feature1', 'feature2', 'feature3']]
y = data['target']

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create and train the model
model = LinearRegression()
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print('Mean Squared Error:', mse)
print('R-squared:', r2)

# Model coefficients
print('Coefficients:', model.coef_)
print('Intercept:', model.intercept_)


# Additional Tips

### Feature Scaling: While linear regression doesn't always require feature scaling,it's a good practice to scale features when they vary in magnitude.

In [None]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


### Cross-validation: To get a better estimate of the model's performance, consider using cross-validation.

In [None]:
from sklearn.model_selection import cross_val_score

scores = cross_val_score(model, X, y, cv=5, scoring='neg_mean_squared_error')
print('Cross-validated MSE:', -scores.mean())
