In [25]:
import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, classification_report
import random

In [26]:
# Set seeds for reproducibility
np.random.seed(42)
random.seed(42)

# Problem 1

In [27]:
array_2d = np.random.randint(1, 51, size=(5, 4))
array_2d

array([[39, 29, 15, 43],
       [ 8, 21, 39, 19],
       [23, 11, 11, 24],
       [36, 40, 24,  3],
       [22,  2, 24, 44]])

In [28]:
anti_diagonal = np.array([array_2d[i, array_2d.shape[1] - 1 - i] for i in range(min(array_2d.shape))])
anti_diagonal

array([43, 39, 11, 36])

In [29]:
max_each_row = array_2d.max(axis=1)
max_each_row 

array([43, 39, 24, 40, 44])

In [30]:
overall_mean = array_2d.mean()
elements_leq_mean = array_2d[array_2d <= overall_mean]
elements_leq_mean

array([15,  8, 21, 19, 23, 11, 11,  3, 22,  2])

In [31]:
def numpy_boundary_traversal(matrix):
    rows, cols = matrix.shape
    boundary = []
    # Top row
    boundary.extend(matrix[0, :].tolist())
    # Right column (excluding top)
    if rows > 1:
        boundary.extend(matrix[1:rows, cols-1].tolist())
    # Bottom row (excluding rightmost), in reverse if more than one row
    if rows > 1:
        boundary.extend(matrix[rows-1, :cols-1][::-1].tolist())
    # Left column (excluding top and bottom), in reverse if more than two rows
    if cols > 1 and rows > 2:
        boundary.extend(matrix[rows-2:0:-1, 0].tolist())
    return boundary

boundary_elements = numpy_boundary_traversal(array_2d)
boundary_elements

[39, 29, 15, 43, 19, 24, 3, 44, 24, 2, 22, 36, 23, 8]

# Problem 2

In [32]:
array_1d = np.random.uniform(0, 10, size=20)
array_1d

array([9.38552709e+00, 7.78765841e-03, 9.92211559e+00, 6.17481510e+00,
       6.11653160e+00, 7.06630522e-02, 2.30624250e-01, 5.24774660e+00,
       3.99860972e+00, 4.66656632e-01, 9.73755519e+00, 2.32771340e+00,
       9.06064345e-01, 6.18386009e+00, 3.82461991e+00, 9.83230886e+00,
       4.66762893e+00, 8.59940407e+00, 6.80307539e+00, 4.50499252e+00])

In [33]:
array_1d_rounded = np.round(array_1d, 2)
array_1d_rounded

array([9.39, 0.01, 9.92, 6.17, 6.12, 0.07, 0.23, 5.25, 4.  , 0.47, 9.74,
       2.33, 0.91, 6.18, 3.82, 9.83, 4.67, 8.6 , 6.8 , 4.5 ])

In [34]:
min_val = array_1d_rounded.min()
max_val = array_1d_rounded.max()
median_val = np.median(array_1d_rounded)

In [35]:
array_1d_modified = np.where(array_1d_rounded < 5, np.round(array_1d_rounded**2, 2), array_1d_rounded)
array_1d_modified

array([ 9.39,  0.  ,  9.92,  6.17,  6.12,  0.  ,  0.05,  5.25, 16.  ,
        0.22,  9.74,  5.43,  0.83,  6.18, 14.59,  9.83, 21.81,  8.6 ,
        6.8 , 20.25])

In [36]:
def numpy_alternate_sort(array):
    sorted_arr = np.sort(array)
    result = []
    i, j = 0, len(sorted_arr) - 1
    while i <= j:
        if i == j:
            result.append(sorted_arr[i])
        else:
            result.append(sorted_arr[i])
            result.append(sorted_arr[j])
        i += 1
        j -= 1
    return np.array(result)

alternate_sorted_array = numpy_alternate_sort(array_1d_rounded)

# Problem 3

In [37]:
names = [f"Student{i+1}" for i in range(10)]
subjects = ["Math", "Physics", "Chemistry", "Biology"]
df_students = pd.DataFrame({
    "Name": names,
    "Subject": [random.choice(subjects) for _ in range(10)],
    "Score": np.random.randint(50, 101, size=10)
})
df_students

Unnamed: 0,Name,Subject,Score
0,Student1,Math,67
1,Student2,Math,53
2,Student3,Chemistry,74
3,Student4,Physics,63
4,Student5,Physics,99
5,Student6,Physics,58
6,Student7,Math,75
7,Student8,Math,51
8,Student9,Biology,69
9,Student10,Math,77


In [38]:
def assign_grade(score):
    if 90 <= score <= 100:
        return "A"
    elif 80 <= score <= 89:
        return "B"
    elif 70 <= score <= 79:
        return "C"
    elif 60 <= score <= 69:
        return "D"
    else:
        return "F"

df_students["Grade"] = df_students["Score"].apply(assign_grade)
df_students

Unnamed: 0,Name,Subject,Score,Grade
0,Student1,Math,67,D
1,Student2,Math,53,F
2,Student3,Chemistry,74,C
3,Student4,Physics,63,D
4,Student5,Physics,99,A
5,Student6,Physics,58,F
6,Student7,Math,75,C
7,Student8,Math,51,F
8,Student9,Biology,69,D
9,Student10,Math,77,C


In [39]:
df_sorted = df_students.sort_values(by="Score", ascending=False)
df_sorted

Unnamed: 0,Name,Subject,Score,Grade
4,Student5,Physics,99,A
9,Student10,Math,77,C
6,Student7,Math,75,C
2,Student3,Chemistry,74,C
8,Student9,Biology,69,D
0,Student1,Math,67,D
3,Student4,Physics,63,D
5,Student6,Physics,58,F
1,Student2,Math,53,F
7,Student8,Math,51,F


In [40]:
avg_score_per_subject = df_students.groupby("Subject")["Score"].mean().reset_index()
avg_score_per_subject 

Unnamed: 0,Subject,Score
0,Biology,69.0
1,Chemistry,74.0
2,Math,64.6
3,Physics,73.333333


In [41]:
def pandas_filter_pass(dataframe):
    return dataframe[dataframe["Grade"].isin(["A", "B"])]

df_pass = pandas_filter_pass(df_students)
df_pass

Unnamed: 0,Name,Subject,Score,Grade
4,Student5,Physics,99,A


# Problem 4: Synthetic movie reviews

In [42]:
positive_templates = [
    "I loved this movie, it was fantastic and moving.",
    "A wonderful film with great performances and story.",
    "An excellent movie, truly inspiring and well-made.",
    "This film was amazing; I enjoyed every moment.",
    "Brilliant direction and outstanding acting."
]
negative_templates = [
    "I hated this movie, it was boring and too long.",
    "A terrible film with poor acting and storyline.",
    "An awful movie, disappointing and poorly made.",
    "This film was horrible; I regretted watching it.",
    "Bad direction and terrible performances."
]

reviews = []
sentiments = []
for _ in range(50):
    reviews.append(random.choice(positive_templates))
    sentiments.append("positive")
for _ in range(50):
    reviews.append(random.choice(negative_templates))
    sentiments.append("negative")

df_reviews = pd.DataFrame({"Review": reviews, "Sentiment": sentiments})
df_reviews

Unnamed: 0,Review,Sentiment
0,"I loved this movie, it was fantastic and moving.",positive
1,"I loved this movie, it was fantastic and moving.",positive
2,A wonderful film with great performances and s...,positive
3,A wonderful film with great performances and s...,positive
4,Brilliant direction and outstanding acting.,positive
...,...,...
95,This film was horrible; I regretted watching it.,negative
96,"An awful movie, disappointing and poorly made.",negative
97,A terrible film with poor acting and storyline.,negative
98,A terrible film with poor acting and storyline.,negative


In [43]:
df_reviews = pd.DataFrame({"Review": reviews, "Sentiment": sentiments})
vectorizer = CountVectorizer(max_features=500, stop_words='english')
X = vectorizer.fit_transform(df_reviews["Review"])
y = df_reviews["Sentiment"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
nb_model = MultinomialNB()
nb_model.fit(X_train, y_train)
y_pred = nb_model.predict(X_test)
accuracy_nb = accuracy_score(y_test, y_pred)
accuracy_nb 

1.0

In [44]:
def predict_review_sentiment(model, vectorizer, review):
    vec = vectorizer.transform([review])
    return model.predict(vec)[0]
    

# Problem 5: Synthetic product feedback

In [45]:
positive_feedback_templates = [
    "The product quality is excellent and I am very satisfied.",
    "Great product, works as expected and exceeds expectations.",
    "I love this product, highly recommend it to others.",
    "Fantastic item, durable and well-designed.",
    "Superb performance and value for money."
]
negative_feedback_templates = [
    "The product is terrible and broke after first use.",
    "Very disappointed, the item is of poor quality.",
    "I hate this product, waste of money.",
    "Awful experience, product didn't meet any expectations.",
    "Bad build and unreliable performance."
]

feedbacks = []
labels = []
for _ in range(50):
    feedbacks.append(random.choice(positive_feedback_templates))
    labels.append("good")
for _ in range(50):
    feedbacks.append(random.choice(negative_feedback_templates))
    labels.append("bad")

df_feedback = pd.DataFrame({"Feedback": feedbacks, "Label": labels})
df_feedback

Unnamed: 0,Feedback,Label
0,"Fantastic item, durable and well-designed.",good
1,The product quality is excellent and I am very...,good
2,The product quality is excellent and I am very...,good
3,The product quality is excellent and I am very...,good
4,"Great product, works as expected and exceeds e...",good
...,...,...
95,Bad build and unreliable performance.,bad
96,"Very disappointed, the item is of poor quality.",bad
97,"I hate this product, waste of money.",bad
98,"Awful experience, product didn't meet any expe...",bad


In [46]:
tfidf_vectorizer = TfidfVectorizer(max_features=300, stop_words='english', lowercase=True)
X_fb = tfidf_vectorizer.fit_transform(df_feedback["Feedback"])
y_fb = df_feedback["Label"]
X_train_fb, X_test_fb, y_train_fb, y_test_fb = train_test_split(X_fb, y_fb, test_size=0.25, random_state=42)
lr_model = LogisticRegression(max_iter=1000)
lr_model.fit(X_train_fb, y_train_fb)
y_pred_fb = lr_model.predict(X_test_fb)
precision, recall, f1, _ = precision_recall_fscore_support(y_test_fb, y_pred_fb, average='binary', pos_label='good')
print(precision, recall, f1, _)

1.0 1.0 1.0 None


In [47]:
def text_preprocess_vectorize(texts, vectorizer):
    return vectorizer.transform(texts)

# Thank You