### A1, A2, A3

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report

# Function to load data from an Excel file
def load_data(file_path, sheet_name):
    df = pd.read_excel(file_path, sheet_name=sheet_name)
    return df

# Function to print the dimensionality of a matrix
def dimentionality(A):
    print(f"Dimensionality = {A.shape[1]}")

# Function to print the number of vectors (rows) in a matrix
def vectors(A):
    print(f"Vectors = {A.shape[0]}")

# Function to print the rank of a matrix
def rank(A):
    rank = np.linalg.matrix_rank(A)
    print(f"Rank = {rank}")

# Function to compute and print the Moore-Penrose inverse of a matrix
def inverse(A):
    inv = np.linalg.pinv(A)
    print(f"Inverse:\n{inv}")
    return inv

# Function to compute and print the cost using the inverse matrix
def cost(inverse_A, C):
    cost = np.dot(inverse_A, C)
    print(f"Cost = {cost}")

# Function to classify customers based on payment and print a classification report
def classify_customers(df):
    # Mark customers as 'RICH' or 'POOR' based on payment
    df['Class'] = np.where(df['Payment (Rs)'] > 200, 'RICH', 'POOR')
   
    # Prepare features (X) and labels (y)
    X = df[['Candies (#)', 'Mangoes (Kg)', 'Milk Packets (#)']].values
    y = df['Class'].values
   
    # Standardize features
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
   
    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.3, random_state=42)
   
    # Train a logistic regression model
    model = LogisticRegression()
    model.fit(X_train, y_train)
   
    # Predict on the test set
    y_pred = model.predict(X_test)
   
    # Print classification report
    print("\nClassification Report:")
    print(classification_report(y_test, y_pred))

# File path and sheet name
file_path = r"C:\Users\Gurram Bhavya Reddy\Downloads\Lab Session Data (1).xlsx"
sheet_name = "Purchase data"

# Load data from the specified Excel sheet
df = load_data(file_path, sheet_name)

# Extract specific columns as arrays
A = df.iloc[:, 1:4].values
C = df.iloc[:, 4].values

# Print the dimensionality of array A
dimentionality(A)

# Print the number of vectors in array A
vectors(A)

# Print the rank of array A
rank(A)

# Compute and print the inverse of array A
inverse_A = inverse(A)

# Compute and print the cost
cost(inverse_A, C)

# Classify customers and print a classification report
classify_customers(df)


### A4

In [37]:
import pandas as pd

# Function to load data from an Excel file into a pandas DataFrame
def load_data(file_path, sheet_name):
    df = pd.read_excel(file_path, sheet_name=sheet_name)
    return df

# Function to calculate and print the mean of a pandas Series
def mean(a):
    mean_price = a.mean()  # Calculate the mean
    print(f"Mean = {mean_price}")

# Function to calculate and print the variance of a pandas Series
def variance(a):
    variance_price = a.var()  # Calculate the variance
    print(f"Variance = {variance_price}")

# Function to calculate and print the mean of 'Price' on Wednesdays
def wed_mean(df):
    wed_data = df[df['Day'] == 'Wed']  # Filter data for Wednesdays
    wed_mean = wed_data['Price'].mean()  # Calculate the mean price on Wednesdays
    print(f"Sample mean (Wednesday) = {wed_mean}")

# Function to calculate and print the mean of 'Price' in April
def apr_mean(df):
    apr_data = df[df['Month'] == 'Apr']  # Filter data for April
    apr_mean = apr_data['Price'].mean()  # Calculate the mean price in April
    print(f"Sample mean (April) = {apr_mean}")

# Function to calculate and print the probability of making a loss
def loss_probability(df):
    # Convert 'Chg%' column to numeric values if it's a string
    if df['Chg%'].dtype == 'object':
        df['Chg%'] = df['Chg%'].str.rstrip('%').astype(float) / 100
    else:
        df['Chg%'] = df['Chg%'].astype(float) / 100
    
    total = len(df)  # Total number of entries
    loss = df['Chg%'].apply(lambda x: x < 0).sum()  # Count of losses
    probability = loss / total  # Calculate the probability of making a loss
    print(f"Probability of making a loss: {probability:.2%}")

# Function to calculate and print the probability of making a profit on Wednesdays
def wed_profit_probability(df):
    wed_data = df[df['Day'] == 'Wed']  # Filter data for Wednesdays
    
    # Convert 'Chg%' column to numeric values if it's a string
    if df['Chg%'].dtype == 'object':
        df['Chg%'] = df['Chg%'].str.rstrip('%').astype(float) / 100
    else:
        df['Chg%'] = df['Chg%'].astype(float) / 100
    
    total = len(wed_data)  # Total number of entries on Wednesdays
    profit = wed_data['Chg%'].apply(lambda x: x > 0).sum()  # Count of profits on Wednesdays
    probability = profit / total  # Calculate the probability of making a profit on Wednesdays
    print(f"Probability of making a profit on Wednesday: {probability:.2%}")

# Function to calculate and print the conditional probability of making a profit on Wednesdays
def wed_conditional_profit_probability(df):
    wed_data = df[df['Day'] == 'Wed']  # Filter data for Wednesdays
    
    # Convert 'Chg%' column to numeric values if it's a string
    if df['Chg%'].dtype == 'object':
        df['Chg%'] = df['Chg%'].str.rstrip('%').astype(float) / 100
    else:
        df['Chg%'] = df['Chg%'].astype(float) / 100
    
    total = len(wed_data)  # Total number of entries on Wednesdays
    profit = wed_data['Chg%'].apply(lambda x: x > 0).sum()  # Count of profits on Wednesdays
    probability = profit / total  # Calculate the conditional probability of making a profit on Wednesdays
    print(f"Conditional Probability of making a profit on Wednesday: {probability:.2%}")

# File path to the Excel file
file_path = r"C:\Users\Gurram Bhavya Reddy\Downloads\Lab Session Data (1).xlsx"

# Sheet name in the Excel file
sheet_name = "IRCTC Stock Price"

# Load data from the specified sheet in the Excel file
df = load_data(file_path, sheet_name)

# Extract the specific column (fourth column in this case) for analysis
df_column = df.iloc[:, 3]  # Adjust this index if needed

# Calculate and print the mean of the extracted column
mean(df_column)

# Calculate and print the variance of the extracted column
variance(df_column)

# Calculate and print the mean price on Wednesdays
wed_mean(df)

# Calculate and print the mean price in April
apr_mean(df)

# Calculate and print the probability of making a loss
loss_probability(df)

# Calculate and print the probability of making a profit on Wednesdays
wed_profit_probability(df)

# Calculate and print the conditional probability of making a profit on Wednesdays
wed_conditional_profit_probability(df)


Mean = 1560.6634538152612
Variance = 58732.36535253918
Sample mean (Wednesday) = 1550.7060000000001
Sample mean (April) = 1698.9526315789474
Probability of making a loss: 49.80%
Probability of making a profit on Wednesday: 42.00%
Conditional Probability of making a profit on Wednesday: 42.00%
