Problem 1a

In [1]:
import csv
import numpy as np
from sklearn.linear_model import LogisticRegression

In [2]:
# Read the CSV file using csv.DictReader for easy column access
with open('Titanic.csv', newline='', encoding='utf-8') as csvfile:
    reader = csv.DictReader(csvfile)
    rows = list(reader)

In [3]:
# Initialize lists for each column
pclass_list = []
sex_list = []
age_list = []
fare_list = []
survived_list = []

In [4]:
# Iterate through each row and convert data to the appropriate types
for row in rows:
    # Convert pclass and survived to integers
    pclass_list.append(int(row['pclass']))
    survived_list.append(int(row['survived']))

    # For the sex column: replace 'male' with 0 and 'female' with 1
    sex_val = row['sex'].strip().lower()
    if sex_val == 'male':
        sex_list.append(0)
    elif sex_val == 'female':
        sex_list.append(1)
    else:
        sex_list.append(np.nan)

    # For the age column: convert to float; if missing, use np.nan
    age_str = row['age'].strip()
    if age_str == '':
        age_list.append(np.nan)
    else:
        try:
            age_list.append(float(age_str))
        except:
            age_list.append(np.nan)

    # For the fare column: convert to float; if missing, use np.nan
    fare_str = row['fare'].strip()
    if fare_str == '':
        fare_list.append(np.nan)
    else:
        try:
            fare_list.append(float(fare_str))
        except:
            fare_list.append(np.nan)

# Convert the lists to numpy arrays
pclass = np.array(pclass_list)
sex = np.array(sex_list)
age = np.array(age_list, dtype=float)
fare = np.array(fare_list, dtype=float)
survived = np.array(survived_list)

# Calculate the average age of male and female passengers (ignoring missing values)
male_avg = np.nanmean(age[sex == 0])
female_avg = np.nanmean(age[sex == 1])

# Fill missing age values with the corresponding gender's average
age[np.isnan(age) & (sex == 0)] = male_avg
age[np.isnan(age) & (sex == 1)] = female_avg

print('Mean male age is:', male_avg)
print('Mean female age is:', female_avg)
# Note: the 41st passenger corresponds to index 40
print('The age of the 41st passenger Dr. Arthur Jackson Brewe is:', age[40])

Mean male age is: 30.58522796352584
Mean female age is: 28.68708762886598
The age of the 41st passenger Dr. Arthur Jackson Brewe is: 30.58522796352584


Problem 1b

In [5]:
# Calculate the average fare for each class (ignoring missing values)
first_class = np.nanmean(fare[pclass == 1])
second_class = np.nanmean(fare[pclass == 2])
third_class = np.nanmean(fare[pclass == 3])

# Fill missing fare values with the average fare of each class
fare[np.isnan(fare) & (pclass == 1)] = first_class
fare[np.isnan(fare) & (pclass == 2)] = second_class
fare[np.isnan(fare) & (pclass == 3)] = third_class

print('The mean price of the first class:', first_class)
print('The mean price of the second class:', second_class)
print('The mean price of the third class:', third_class)
print('The ticket fare of the 1226th passenger Mr. Thomas Storey is:', fare[1225])

The mean price of the first class: 87.50899164086688
The mean price of the second class: 21.179196389891697
The mean price of the third class: 13.302888700564973
The ticket fare of the 1226th passenger Mr. Thomas Storey is: 13.302888700564973


Problem 1c

In [6]:
# Define a z-score normalization function
def z_Norm(x):
    u = np.mean(x)
    theta = np.std(x)
    return (x - u) / theta

# Normalize the fare and age columns
fare_norm = z_Norm(fare)
age_norm = z_Norm(age)
fare = fare_norm
age = age_norm

print("Normalized ticket price and age values for the first passenger:", fare[0], age[0])

Normalized ticket price and age values for the first passenger: 3.442579170469809 -0.07058969559145065


Problem 1d

In [7]:
# Construct the feature matrix X (including pclass, sex, age, fare) and target vector y (survived)
X = np.column_stack((pclass, sex, age, fare))
y = survived

# Randomly split the data: 80% for training and 20% for testing
data_num = len(y)
train_size = int(0.8 * data_num)
random_indices = np.random.permutation(data_num)
train_indices = random_indices[:train_size]
test_indices = random_indices[train_size:]
X_train = X[train_indices]
y_train = y[train_indices]
X_test = X[test_indices]
y_test = y[test_indices]

print(f"Training X set size: {len(X_train)} samples")
print(f"Testing X set size: {len(X_test)} samples")
print(f"Training y set size: {len(y_train)} samples")
print(f"Testing y set size: {len(y_test)} samples")

Training X set size: 1047 samples
Testing X set size: 262 samples
Training y set size: 1047 samples
Testing y set size: 262 samples


Problem 1e

In [8]:
# Train the logistic regression model
clf = LogisticRegression(max_iter=1000)
clf.fit(X_train, y_train)
print(f'Mean test accuracy: {clf.score(X_test, y_test)}')

Mean test accuracy: 0.732824427480916


Problem 2

In [9]:
#Import library
import cv2 as cv
#Create filter matrix
filter=np.array([[-1,-1,-1],[-1,8,-1],[-1,-1,-1]])
input_image=cv.imread("Testudo.jpg")
#Padding, use create tmp which has same size as input image
tmp=np.zeros((input_image.shape[0]+2,input_image.shape[1]+2,input_image.shape[2]))
for i in range(input_image.shape[2]):
    tmp[:,:,i]=np.pad(input_image[:,:,i],pad_width=1,constant_values=0)
#Value input_image with padded tmp
input_image=tmp
np.shape(input_image)

(2007, 2007, 3)

In [10]:
#A function for compute one time convolution
def convolution(x,y,image,filter):
    result=np.zeros((1,1,3))
    for i in range(image.shape[2]):
        result[:,:,i]=np.sum(image[x-1:x+2,y-1:y+2,i].dot(filter))
    return result

In [17]:
#Implement convolution to every pixel in the image
tmp=np.zeros((input_image.shape[0]-2,input_image.shape[1]-2,input_image.shape[2]))
for x in range(1,input_image.shape[0]-1):
    for y in range(1,input_image.shape[1]-1):
        tmp[x-1,y-1,:]=convolution(x,y,input_image,filter)
#Flip the image vertically
tmp=np.flip(tmp,axis=1)

In [18]:
#Save the image to disk
if cv.imwrite("Testudo_out.jpg",tmp[:,:,1]):
    print("Testudo_out.jpg has been saved successfully!")

Testudo_out.jpg has been saved successfully!


Problem 3

In [13]:
# Define the XOR input values (all possible combinations of 2 binary inputs)
x1 = int(input("input the first bit (0 or 1): "))
x2 = int(input("input the second bit (0 or 1): "))
X=np.array([[x1,x2]])

In [14]:
# A copy of the given code
W1=np.array([[1,-1],
             [-1,1]])
b1=np.array([[0,0]])
W2=np.array([[1],
             [1]])
b2=np.array([[-0.5]])
def relu(x):
    return np.maximum(0,x)
def sigmoid(x):
    return 1/(1+np.exp(-x))

In [15]:
# Calculate dot products and apply functions
H=X.dot(W1)+b1
Y=relu(H).dot(W2)+b2
output=sigmoid(Y)

In [16]:
print(f"XOR outcome of ({x1}, {x2}) is {round(output[0,0])}")

XOR outcome of (1, 1) is 0
