**A1**

In [1]:
import numpy as np

# Define the dataset
dataset = np.array([
    [20, 6, 2, 386],
    [16, 3, 6, 289],
    [27, 6, 2, 393],
    [19, 1, 2, 110],
    [24, 4, 2, 280],
    [22, 1, 5, 167],
    [15, 4, 2, 271],
    [18, 4, 2, 274],
    [21, 1, 4, 148],
    [16, 2, 4, 198]
])

# Separate the data into two matrices using different nomenclature
features_matrix = dataset[:, :-1] 
payment_vector = dataset[:, -1] 

# Display the matrices
print("The features matrix (X) contains:\n", features_matrix)
print("The payment vector (Y) contains:\n", payment_vector)

# Determine the dimensionality of the vector space
dimensionality = features_matrix.shape[1]  # Number of columns in the features matrix
print("\nThe dimensionality of the vector space is:", dimensionality)

# Calculate the number of vectors in the vector space
num_vectors = features_matrix.shape[0]  # Number of rows in the features matrix
print("The number of vectors in the vector space is:", num_vectors)

# Calculate the rank of the features matrix
matrix_rank = np.linalg.matrix_rank(features_matrix)
print("\nThe rank of the features matrix is:", matrix_rank)

# Calculate the pseudo-inverse of the features matrix
pseudo_inverse = np.linalg.pinv(features_matrix)
print("\nThe pseudo-inverse of the features matrix is:\n", pseudo_inverse)


The features matrix (X) contains:
 [[20  6  2]
 [16  3  6]
 [27  6  2]
 [19  1  2]
 [24  4  2]
 [22  1  5]
 [15  4  2]
 [18  4  2]
 [21  1  4]
 [16  2  4]]
The payment vector (Y) contains:
 [386 289 393 110 280 167 271 274 148 198]

The dimensionality of the vector space is: 3
The number of vectors in the vector space is: 10

The rank of the features matrix is: 3

The pseudo-inverse of the features matrix is:
 [[-0.01008596 -0.03124505  0.01013951  0.0290728   0.0182907   0.01161794
  -0.00771348  0.00095458  0.01743623 -0.00542016]
 [ 0.09059668  0.07263726  0.03172933 -0.09071908 -0.01893196 -0.06926996
   0.05675464  0.03152577 -0.07641966  0.00357352]
 [ 0.00299878  0.15874243 -0.05795468 -0.06609024 -0.06295043  0.03348017
   0.01541831 -0.01070461  0.00029003  0.05938755]]


**A2**

In [None]:
import numpy as np

X = np.dot(pseudo_inverse,payment_vector )
print("\nThe model vector X for predicting the cost of the products:", X)
productcosts = np.dot(features_matrix, X)
print("\nThe cost of each product is as follows:")
for i, cost in enumerate(productcosts):
    print(f"The cost of Product {i+1} is: Rs.{cost:.2f}")


The model vector X for predicting the cost of the products: [ 1. 55. 18.]

The cost of each product is as follows:
The cost of Product 1 is: Rs.386.00
The cost of Product 2 is: Rs.289.00
The cost of Product 3 is: Rs.393.00
The cost of Product 4 is: Rs.110.00
The cost of Product 5 is: Rs.280.00
The cost of Product 6 is: Rs.167.00
The cost of Product 7 is: Rs.271.00
The cost of Product 8 is: Rs.274.00
The cost of Product 9 is: Rs.148.00
The cost of Product 10 is: Rs.198.00


**A3**

In [None]:
import numpy as np

# Input data: Payment amount and items purchased by 10 customers
customer_data = np.array([
    [20, 6, 2, 386],
    [16, 3, 6, 289],
    [27, 6, 2, 393],
    [19, 1, 2, 110],
    [24, 4, 2, 280],
    [22, 1, 5, 167],
    [15, 4, 2, 271],
    [18, 4, 2, 274],
    [21, 1, 4, 148],
    [16, 2, 4, 198]
])

# Function to compute Euclidean distance between two points
def calculate_distance(point1, point2):
    return np.sqrt(np.sum((point1 - point2) ** 2))

# KNN prediction function
def knn_predict(train_data, labels, test_instance, k=3):
    distances = []
    for i, train_instance in enumerate(train_data):
        distance = calculate_distance(train_instance, test_instance)
        distances.append((distance, labels[i]))
    distances = sorted(distances)[:k]
    counts = np.bincount([d[1] for d in distances])
    return np.argmax(counts)

# Labels: 1 if payment > 250, else 0
labels = np.where(customer_data[:, -1] > 200, 1, 0)

# Training data: Remove the payment column
train_data = customer_data[:, :-1]

# Predict and print classes for each customer
print("Predicted Classes:")
for i, customer_instance in enumerate(train_data):
    predicted_class = knn_predict(train_data, labels, customer_instance)
    if predicted_class == 1:
        print(f"Customer {i+1}: RICH")
    else:
        print(f"Customer {i+1}: POOR")


Predicted Classes:
Customer 1: RICH
Customer 2: RICH
Customer 3: RICH
Customer 4: POOR
Customer 5: RICH
Customer 6: POOR
Customer 7: RICH
Customer 8: RICH
Customer 9: POOR
Customer 10: RICH


**A4**

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import statistics as st

# Reading the excel file IRCTC Stock Price from the file path 
file_path = "D:\\SEM4\\ML\\IRCTC Stock Price.xlsx"
stock_data = pd.read_excel(file_path)

# Calculating mean and variance of the 'Price' column using numpy
price_mean = st.mean(stock_data['Price'])
price_variance = st.variance(stock_data['Price'])
print("The Mean of the column Price data is:", price_mean)
print("The Variance of the column Price data is:", price_variance)

# Convert 'Date' column to datetime
stock_data['Date'] = pd.to_datetime(stock_data['Date'])

# Selecting the 'Price' column data for Wednesdays and finding the sample mean
wednesday_prices = stock_data[stock_data['Day'] == 'Wed']['Price']
wednesday_mean_price = st.mean(wednesday_prices)
print("The Sample mean of Wednesday for the column prices is:", wednesday_mean_price)

# Selecting the 'Price' data for the month of April and calculating the sample mean
april_prices = stock_data[stock_data['Month'] == 'Apr']['Price']
april_mean_price = st.mean(april_prices)
print("The Sample mean of April for the column prices is:", april_mean_price)

# Probability for experiencing a loss 
price_changes = stock_data['Chg%'].apply(lambda x: float(x.strip('%')) if isinstance(x, str) else x)
loss_probability = len(price_changes[price_changes < 0]) / len(price_changes)
print("The Probability of experiencing a loss in the stock is:", loss_probability)

# Probability of making a profit on Wednesdays
profitable_wednesdays = len(wednesday_prices[wednesday_prices.diff() > 0]) / len(wednesday_prices)
print("The Probability of making a profit on Wednesdays is:", profitable_wednesdays)

# Conditional probability of making profit, given that today is Wednesday
conditional_profit_probability = len(wednesday_prices[wednesday_prices.diff() > 0]) / len(wednesday_prices)
print("The Conditional probability of making a profit on Wednesday is:", conditional_profit_probability)

# Scatter plot of Chg% data against the day of the week
plt.figure(figsize=(10, 6))
sns.scatterplot(data=stock_data, x='Day', y='Chg%')
plt.title('Scatter plot of Chg% data against the day of the week')
plt.xlabel('Day of the week')
plt.ylabel('Chg%')
plt.show()

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


ImportError: Missing optional dependency 'openpyxl'.  Use pip or conda to install openpyxl.