In [None]:
#apriori algorithm
from itertools import combinations

# Function to generate candidate itemsets of size k
def generate_candidates(itemsets, k):
    candidates = set()
    for itemset1 in itemsets:
        for itemset2 in itemsets:
            candidate = itemset1 | itemset2
            if len(candidate) == k:
                candidates.add(candidate)
    return candidates

# Function to prune infrequent itemsets
def prune_itemsets(itemsets, k, min_support, transactions):
    frequent_itemsets = set()
    item_counts = {}
    for itemset in itemsets:
        for transaction in transactions:
            if itemset.issubset(transaction):
                item_counts[itemset] = item_counts.get(itemset, 0) + 1
    for itemset, count in item_counts.items():
        support = count / len(transactions)
        if support >= min_support:
            frequent_itemsets.add(itemset)
    return frequent_itemsets

# Apriori algorithm
def apriori(transactions, min_support):
    itemsets = [frozenset([item]) for item in set(item for transaction in transactions for item in transaction)]
    frequent_itemsets = []
    k = 2
    while itemsets:
        candidates = generate_candidates(itemsets, k)
        frequent_itemsets_k = prune_itemsets(candidates, k, min_support, transactions)
        if not frequent_itemsets_k:
            break
        frequent_itemsets.extend(frequent_itemsets_k)
        itemsets = frequent_itemsets_k
        k += 1
    return frequent_itemsets

# Example usage
if __name__ == "__main__":
    # Example transactions
    transactions = [
        {'milk', 'bread', 'butter'},
        {'milk', 'bread'},
        {'milk', 'butter'},
        {'milk', 'bread', 'jam'},
        {'bread', 'butter'},
        {'bread', 'jam'},
        {'butter', 'jam'},
    ]

    # Minimum support threshold (adjust as needed)
    min_support = 0.4

    frequent_itemsets = apriori(transactions, min_support)
    print("Frequent Itemsets:")
    for itemset in frequent_itemsets:
        print(itemset)

Frequent Itemsets:
frozenset({'bread', 'milk'})


In [None]:
#kmeans algorithm
import numpy as np

def euclidean_distance(point1,point2):
  return np.linalg.norm(point1-point2)

def kmeans(data,k,max_iterations=100):
  centroids=data[np.random.choice(len(data),k,replace=False)]
  for _ in range(max_iterations):
    clusters=[[] for i in range(k)]
    for point in data:
      distances=[euclidean_distance(point,centroid) for centroid in centroids]
      cluster_index=np.argmin(distances)
      clusters[cluster_index].append(point)
    new_centroids=[np.mean(cluster,axis=0) if cluster else centroids[i] for i, cluster in enumerate(clusters)]
    if np.array_equal(centroids,new_centroids):
      break
    centroids=new_centroids
  return centroids,clusters
data = np.array([[2, 3], [2, 4], [8, 9], [7, 8], [1, 2], [8, 8], [9, 7], [2, 2]])
# Number of clusters (k=2)
k = 2
centroids, clusters = kmeans(data, k)
# Print the final centroids and clusters
for i in range(k):
  print(f"Cluster {i + 1} - Centroid: {centroids[i]}, Points: {clusters[i]}")

Cluster 1 - Centroid: [8. 8.], Points: [array([8, 9]), array([7, 8]), array([8, 8]), array([9, 7])]
Cluster 2 - Centroid: [1.75 2.75], Points: [array([2, 3]), array([2, 4]), array([1, 2]), array([2, 2])]


In [None]:
import numpy as np

# Given data
air_velocity = np.array([30, 70, 110, 150, 180, 220, 260, 300, 350, 390])
evaporation_coefficient = np.array([0.18, 0.37, 0.35, 0.78, 0.56, 0.75, 1.18, 1.36, 1.17, 1.65])

# Step 1: Calculate the mean values of x and y
x_mean = np.mean(air_velocity)
y_mean = np.mean(evaporation_coefficient)

# Step 2: Calculate b1 (slope)
numerator = np.sum((air_velocity - x_mean) * (evaporation_coefficient - y_mean))
denominator = np.sum((air_velocity - x_mean) ** 2)
b1 = numerator / denominator

# Step 3: Calculate b0 (intercept)
b0 = y_mean - b1 * x_mean

# Step 4: Create the regression equation
def regression_equation(x):
    return b0 + b1 * x

# Step 5: Calculate R-squared value
y_predicted = regression_equation(air_velocity)
ssr = np.sum((y_predicted - y_mean) ** 2)
sst = np.sum((evaporation_coefficient - y_mean) ** 2)
r_squared = 1 - (ssr / sst)

# Step 6: Find the value of the evaporation coefficient for Air Velocity = 250
air_velocity_new = 250
evaporation_coefficient_predicted = regression_equation(air_velocity_new)

print(f"b1 (slope): {b1}")
print(f"b0 (intercept): {b0}")
print(f"Regression Equation: ŷ = {b0} + {b1}*x")
print(f"R-squared value: {r_squared}")
print(f"Predicted Evaporation Coefficient for Air Velocity = {air_velocity_new}: {evaporation_coefficient_predicted}")


b1 (slope): 0.003877092374457532
b0 (intercept): 0.03631897086174829
Regression Equation: ŷ = 0.03631897086174829 + 0.003877092374457532*x
R-squared value: 0.09251242604921606
Predicted Evaporation Coefficient for Air Velocity = 250: 1.0055920644761314


In [None]:
#naive bayes
import pandas as pd
import numpy as np

# Define the dataset
data = {
    'Outlook': ['Rainy', 'Rainy', 'Overcast', 'Sunny', 'Sunny', 'Sunny', 'Overcast', 'Rainy', 'Rainy', 'Sunny', 'Rainy', 'Overcast', 'Overcast', 'Sunny'],
    'Temp': ['Hot', 'Hot', 'Hot', 'Mild', 'Cool', 'Cool', 'Cool', 'Mild', 'Cool', 'Mild', 'Mild', 'Hot', 'Hot', 'Mild'],
    'Humidity': ['High', 'High', 'High', 'High', 'Normal', 'Normal', 'Normal', 'High', 'Normal', 'Normal', 'Normal', 'High', 'Normal', 'High'],
    'Windy': ['f', 't', 'f', 'f', 'f', 't', 't', 'f', 'f', 'f', 't', 't', 'f', 't'],
    'Play': ['no', 'no', 'yes', 'yes', 'yes', 'no', 'yes', 'no', 'yes', 'yes', 'yes', 'yes', 'yes', 'no']
}

df = pd.DataFrame(data)

# Define the test input
test_input = ['Sunny', 'Mild', 'High', 't']

# Calculate class probabilities

# Prior probabilities
P_yes = len(df[df['Play'] == 'yes']) / len(df)
P_no = len(df[df['Play'] == 'no']) / len(df)

# Conditional probabilities
def conditional_probability(feature, value, target):
    return len(df[(df[feature] == value) & (df['Play'] == target)]) / len(df[df['Play'] == target])

# Calculate conditional probabilities for 'yes' and 'no' classes
P_outlook_sunny_yes = conditional_probability('Outlook', test_input[0], 'yes')
P_outlook_sunny_no = conditional_probability('Outlook', test_input[0], 'no')
P_temp_mild_yes = conditional_probability('Temp', test_input[1], 'yes')
P_temp_mild_no = conditional_probability('Temp', test_input[1], 'no')
P_humidity_high_yes = conditional_probability('Humidity', test_input[2], 'yes')
P_humidity_high_no = conditional_probability('Humidity', test_input[2], 'no')
P_windy_t_yes = conditional_probability('Windy', test_input[3], 'yes')
P_windy_t_no = conditional_probability('Windy', test_input[3], 'no')

# Calculate likelihood for both 'yes' and 'no' classes
likelihood_yes = P_outlook_sunny_yes * P_temp_mild_yes * P_humidity_high_yes * P_windy_t_yes * P_yes
likelihood_no = P_outlook_sunny_no * P_temp_mild_no * P_humidity_high_no * P_windy_t_no * P_no

# Compare the likelihoods and classify the test input
if likelihood_yes > likelihood_no:
    result = "Play = yes"
else:
    result = "Play = no"

print("Test input is classified as:", result)

Test input is classified as: Play = no
