In [None]:
# Q1 : Lists and Loops
# 1.1: Define the list
numbers = [15, 8, 22, 7, 31, 4, 17]

# 1.2: Print all even numbers from the list
print("Even numbers:")
for num in numbers:
    if num % 2 == 0:
        print(num)

# 1.3: Create a new list containing the square of each odd number
squared_odds = []
for num in numbers:
    if num % 2 != 0:
        squared_odds.append(num ** 2)

# 1.4: Print the new list
print("Squares of odd numbers:")
print(squared_odds)

In [None]:
# Q2 : Strings and Dictionaries
# 2.1: Given sentence
sentence = "The book was interesting because the book covered many topics and the topics discussed in the book were engaging"

# 2.2: Convert to lowercase and split into words
words = sentence.lower().split()

# 2.3: Count word occurrences using a dictionary
word_counts = {}
for word in words:
    if word in word_counts:
        word_counts[word] += 1
    else:
        word_counts[word] = 1

# 2.4: Print the dictionary
print("Word counts:")
print(word_counts)

In [None]:
# Q3 : Functions and Conditional logic
# 3.1: Define the function to check for prime
def is_prime(n):
    if n < 2:
        return False
    for i in range(2, int(n**0.5) + 1):
        if n % i == 0:
            return False
    return True

# 3.2: Given list
nums = [2, 4, 5, 10, 13, 17, 20, 23]

# 3.3: Check each number and store primes in a new list
prime_nums = []
for num in nums:
    if is_prime(num):
        prime_nums.append(num)

# 3.4: Print the list of prime numbers
print("Prime numbers:")
print(prime_nums)

In [None]:
# Q4 : NumPy Array Operations and Statistics
import numpy as np

# 4.1: Generate a 2D array of shape (10, 5) with random integers between 50 and 100
np.random.seed(0)  # For reproducibility
scores = np.random.randint(50, 101, size=(10, 5))

# 4.2.1: Compute the average score per student (across subjects)
student_averages = np.mean(scores, axis=1)

# 4.2.2: Compute the maximum score per subject (down each column)
max_per_subject = np.max(scores, axis=0)

# 4.2.3: Add a new column with each student's average score
final_array = np.column_stack((scores, student_averages))

# 4.2.4: Print the final array
print("Final Scores Array (Last column = Student Average):")
print(final_array)

print("\nMaximum score per subject:")
print(max_per_subject)

In [None]:
# Q5 : Saving Data to CSV
import numpy as np
import csv

# Assume scores and student_averages were already created (from Question 4)
np.random.seed(0)
scores = np.random.randint(50, 101, size=(10, 5))
student_averages = np.mean(scores, axis=1)
final_array = np.column_stack((scores, student_averages))

# 5.1: Define the header
header = ["Subject1", "Subject2", "Subject3", "Subject4", "Subject5", "Average"]

# 5.2: Save to CSV
with open("student_scores.csv", mode="w", newline="") as file:
    writer = csv.writer(file)
    writer.writerow(header)         # Write header
    writer.writerows(final_array)   # Write student data

print("Data saved to student_scores.csv successfully.")

In [None]:
# Q6 : Creating and Exploring a DataFrame
import pandas as pd

# 6.0: Create a DataFrame with sample data
data = {
    'Product': ['Pen', 'Notebook', 'Eraser', 'Pencil', 'Marker', 'Ruler', 'Sharpener', 'Highlighter'],
    'Price': [1.5, 3.0, 0.5, 1.0, 2.5, 1.2, 0.8, 2.0],
    'Quantity': [100, 50, 200, 150, 60, 80, 120, 90]
}

df = pd.DataFrame(data)

# 6.1: Add a column TotalValue = Price * Quantity
df['TotalValue'] = df['Price'] * df['Quantity']

# 6.2: Sort the DataFrame in descending order by TotalValue
df_sorted = df.sort_values(by='TotalValue', ascending=False)

# 6.3: Group by Price and compute average Quantity
avg_quantity_by_price = df.groupby('Price')['Quantity'].mean()

# Print results
print("DataFrame with TotalValue and sorted:")
print(df_sorted)

print("\nAverage Quantity by Price:")
print(avg_quantity_by_price)

In [None]:
# Q7 : Analysis of a Real-World Dataset (Titanic)
import pandas as pd

# 7.1: Load the dataset
df = pd.read_csv('train.csv')

# 7.2: Display total number of missing values in each column
print("Missing values per column:")
print(df.isnull().sum())

# 7.3.1: Passengers who are under 18 years old
print("\nPassengers under 18:")
print(df[df['Age'] < 18])

# 7.3.2: Female passengers who survived
print("\nFemale passengers who survived:")
print(df[(df['Sex'] == 'female') & (df['Survived'] == 1)])

# 7.4: Average fare paid by passengers in each class
avg_fare_by_class = df.groupby('Pclass')['Fare'].mean()
print("\nAverage fare by passenger class:")
print(avg_fare_by_class)

In [None]:
# Q8 : Data Cleaning and Aggregation (Heart Disease Dataset)
import pandas as pd
import numpy as np

# 8.1: Load the dataset with given column names
column_names = ['Age', 'Sex', 'Chest PainType', 'RestingBP', 'Cholesterol', 'FastingBS',
                'RestingECG', 'MaxHR', 'Exercise Angina', 'Oldpeak', 'ST_Slope',
                'NumVessels Fluoro', 'Thalassemia', 'Heart Disease']

df = pd.read_csv('processed.cleveland.data', names=column_names)

# 8.2: Replace '?' with np.nan and convert relevant columns to numeric
df.replace('?', np.nan, inplace=True)

# Convert numeric columns
numeric_cols = ['Age', 'RestingBP', 'Cholesterol', 'FastingBS', 'MaxHR', 'Oldpeak',
                'NumVessels Fluoro', 'Heart Disease']
for col in numeric_cols:
    df[col] = pd.to_numeric(df[col], errors='coerce')

# 8.3.1: Display count of missing values
print("Missing values per column:")
print(df.isnull().sum())

# 8.3.2: Replace missing values
# NumVessels Fluoro is numeric — use median
df['NumVessels Fluoro'].fillna(df['NumVessels Fluoro'].median(), inplace=True)

# Thalassemia is categorical — use mode
df['Thalassemia'].fillna(df['Thalassemia'].mode()[0], inplace=True)

# 8.3.3: Drop rows with more than 2 missing values
df = df[df.isnull().sum(axis=1) <= 2]

# 8.3.4: Group by Heart Disease and compute average cholesterol
# Note: Heart Disease = 0 means no disease; >0 indicates presence
avg_cholesterol = df.groupby('Heart Disease')['Cholesterol'].mean()

# Display the result
print("\nAverage cholesterol by heart disease status:")
print(avg_cholesterol)

In [None]:
# Q9 : Visualizing Student Scores
import pandas as pd
import matplotlib.pyplot as plt

# 9.1: Create the DataFrame
data = {
    'Name': ['Alice', 'Bob', 'Charlie', 'David', 'Eva'],
    'Math': [85, 78, 92, 88, 76],
    'Science': [90, 82, 89, 94, 75],
    'English': [78, 85, 88, 80, 82],
    'History': [84, 80, 91, 79, 77]
}
df = pd.DataFrame(data)

# 9.2: Bar chart for Math scores
plt.figure(figsize=(8, 5))
plt.bar(df['Name'], df['Math'], color='skyblue')
plt.title('Math Scores of Students')
plt.xlabel('Student Name')
plt.ylabel('Math Score')
plt.tight_layout()
plt.show()

# 9.3: Line plot for all subjects
plt.figure(figsize=(10, 6))
subjects = ['Math', 'Science', 'English', 'History']
for subject in subjects:
    plt.plot(df['Name'], df[subject], marker='o', label=subject)

plt.title('Student Performance Across Subjects')
plt.xlabel('Student Name')
plt.ylabel('Score')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

In [None]:
# Q10 : Visualizing Random Data Distribution
import numpy as np
import matplotlib.pyplot as plt

# 10.1: Generate 1000 random numbers from a normal distribution
mean = 50
std_dev = 10
data = np.random.normal(loc=mean, scale=std_dev, size=1000)

# 10.2 & 10.3: Plot histogram with customization
plt.figure(figsize=(10, 6))
plt.hist(data, bins=20, color='skyblue', edgecolor='black')

# Add vertical line for the mean
plt.axvline(mean, color='red', linestyle='dashed', linewidth=2, label=f'Mean = {mean}')

# Add title, labels, and grid
plt.title('Histogram of Normally Distributed Data')
plt.xlabel('Value')
plt.ylabel('Frequency')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()