In [1]:
from sklearn.naive_bayes import GaussianNB

# Training data
X = [
    [1, 1],
    [1, 0],
    [0, 1],
    [0, 0]
]
y = [1, 1, 0, 0]

# Train the model
model = GaussianNB()
model.fit(X, y)

# Predict new email
new_email = [[1, 1], [0, 0], [0, 1]]
prediction = model.predict(new_email)

print("Prediction:", "Spam" if prediction[0] == 1 else "Not Spam")


Prediction: Spam


In [2]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB

# Training data (emails as text)
emails = [
    "You win a free prize today",   # spam
    "Win money now",                # spam
    "Meeting schedule for tomorrow",# not spam
    "Free lunch at the office"      # not spam
]
labels = [1, 1, 0, 0]  # 1 = spam, 0 = not spam

# Step 1: Convert text into word counts
vectorizer = CountVectorizer()
X = vectorizer.fit_transform(emails)

# Step 2: Train Naïve Bayes
model = MultinomialNB()
model.fit(X, labels)

# Step 3: Predict new email
new_email = ["Win a free vacation"]
X_new = vectorizer.transform(new_email)
prediction = model.predict(X_new)

print("Prediction:", "Spam" if prediction[0] == 1 else "Not Spam")

Prediction: Spam


In [3]:
import numpy as np
from sklearn.linear_model import LogisticRegression

# Data
X = np.array([[1], [2], [3], [4], [5], [6], [7], [8]])
y = np.array([0, 0, 0, 1, 1, 1, 1, 1])

# Create and train model
model = LogisticRegression()
model.fit(X, y)

# Predict for new data
test_hours = np.array([[3.5], [5.5]])
predictions = model.predict(test_hours)
probabilities = model.predict_proba(test_hours)

print("Predictions:", predictions)
print("Probabilities:\n", probabilities)


Predictions: [1 1]
Probabilities:
 [[0.49321186 0.50678814]
 [0.08794646 0.91205354]]


In [4]:
import numpy as np
from sklearn.linear_model import LogisticRegression

# Data
X = np.array([[1], [2], [3], [4], [5], [6], [7], [8]])
y = np.array([0, 0, 0, 1, 1, 1, 1, 1])

# Train a model
model = LogisticRegression()
model.fit(X, y)

# See learned values
print("β0 =", model.intercept_[0])
print("β1 =", model.coef_[0][0])

# Predict for new student
print(model.predict([[3.5]]))
print(model.predict_proba([[3.5]]))


β0 = -4.018524277176014
β1 = 1.1559081460420317
[1]
[[0.49321186 0.50678814]]


In [5]:
from sklearn.svm import SVC
import numpy as np

# 1) DATA
# X = [study hours, sleep hours]
X = np.array([[1,4],[2,5],[3,5],[6,6],[7,7],[8,6]])

# y = labels (0 = FAIL, 1 = PASS)
y = np.array([0,0,0,1,1,1])

# 2) CREATE SVM MODEL
model = SVC(kernel='linear')  # linear boundary

# 3) TRAIN
model.fit(X, y)

# 4) PREDICT NEW STUDENT
new_student = [[6,5]]  # 6 hours study, 5 hours sleep
prediction = model.predict(new_student)

print("Prediction:", prediction)


Prediction: [1]


In [6]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler

# 1) Create Data
data = {
    "study_hours": [1,3,6,8],
    "sleep_hours": [4,5,6, np.nan],
    "gender": ["Male", "Female", "Male", "Female"],
    "result": ["Fail", "Fail", "Pass", "Pass"]
}
df = pd.DataFrame(data)

# 2) Handle Missing Values (fill mean)
df["sleep_hours"].fillna(df["sleep_hours"].mean(), inplace=True)

# 3) Encode Text (Male/Female → numbers)
encoder = LabelEncoder()
df["gender"] = encoder.fit_transform(df["gender"])
df["result"] = encoder.fit_transform(df["result"])  # Pass/Fail

# 4) Separate X and y
X = df[["study_hours", "sleep_hours", "gender"]]
y = df["result"]

# 5) Scale X (make values equal range)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# 6) Train/Test Split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.25)

df, X_train, X_test, y_train, y_test


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df["sleep_hours"].fillna(df["sleep_hours"].mean(), inplace=True)


(   study_hours  sleep_hours  gender  result
 0            1          4.0       1       0
 1            3          5.0       0       0
 2            6          6.0       1       1
 3            8          5.0       0       1,
 array([[ 0.55708601,  1.41421356,  1.        ],
        [-1.29986737, -1.41421356,  1.        ],
        [-0.55708601,  0.        , -1.        ]]),
 array([[ 1.29986737,  0.        , -1.        ]]),
 2    1
 0    0
 1    0
 Name: result, dtype: int64,
 3    1
 Name: result, dtype: int64)

In [8]:
from sklearn.linear_model import LinearRegression
import numpy as np

# Dataset
X = np.array([[1],[2],[3],[4],[5]])   # Hours studied
y = np.array([35,50,65,70,80])        # Exam scores

# Model
model = LinearRegression()

# Train
model.fit(X, y)

# Predict
new_student = [[6]]  # Study for 6 hours
prediction = model.predict(new_student)

print("Predicted Score:", prediction)

Predicted Score: [93.]
