Problem 1

In [1]:
import numpy as np

arr = np.random.randint(1, 51, size=(5, 4))
print("Original Array:\n", arr)

anti_diag = [arr[i, -i-1] for i in range(min(arr.shape))]
print("Anti-diagonal Elements:", anti_diag)

max_per_row = np.max(arr, axis=1)
print("Max value in each row:", max_per_row)

mean_val = np.mean(arr)
less_equal_mean = arr[arr <= mean_val]
print("Elements <= Mean (%.2f):" % mean_val, less_equal_mean)


def numpy_boundary_traversal(matrix):
    result = []
    rows, cols = matrix.shape
    result.extend(matrix[0]) 
    result.extend(matrix[1:rows-1, -1]) 
    if rows > 1:
        result.extend(matrix[-1][::-1]) 
        result.extend(matrix[1:rows-1, 0][::-1]) 
    return result

print("Boundary Traversal:", numpy_boundary_traversal(arr))


Original Array:
 [[12 45 15 38]
 [35  9 22 19]
 [39 14  7 44]
 [39 18 14 16]
 [23 13 34 24]]
Anti-diagonal Elements: [38, 22, 14, 39]
Max value in each row: [45 35 44 39 34]
Elements <= Mean (24.00): [12 15  9 22 19 14  7 18 14 16 23 13 24]
Boundary Traversal: [12, 45, 15, 38, 19, 44, 16, 24, 34, 13, 23, 39, 39, 35]


Problem 2

In [2]:

arr2 = np.random.uniform(0, 10, size=20)
print("Original Array (Rounded):", np.round(arr2, 2))

print("Min:", np.min(arr2))
print("Max:", np.max(arr2))
print("Median:", np.median(arr2))

arr2[arr2 < 5] = np.square(arr2[arr2 < 5])
print("After squaring values < 5:", np.round(arr2, 2))

def numpy_alternate_sort(array):
    sorted_arr = np.sort(array)
    result = []
    i, j = 0, len(sorted_arr) - 1
    while i <= j:
        if i != j:
            result.append(sorted_arr[i])
            result.append(sorted_arr[j])
        else:
            result.append(sorted_arr[i])
        i += 1
        j -= 1
    return np.array(result)

print("Alternating Sort Result:", np.round(numpy_alternate_sort(arr2), 2))


Original Array (Rounded): [1.15 2.84 0.2  9.62 8.7  4.13 6.19 5.81 5.22 0.93 0.47 8.3  8.66 1.66
 8.36 7.21 3.52 8.36 5.64 8.95]
Min: 0.19731372434415362
Max: 9.623905149674902
Median: 5.725203933595362
After squaring values < 5: [ 1.33  8.05  0.04  9.62  8.7  17.07  6.19  5.81  5.22  0.87  0.22  8.3
  8.66  2.75  8.36  7.21 12.41  8.36  5.64  8.95]
Alternating Sort Result: [ 0.04 17.07  0.22 12.41  0.87  9.62  1.33  8.95  2.75  8.7   5.22  8.66
  5.64  8.36  5.81  8.36  6.19  8.3   7.21  8.05]


Problem 3

In [3]:
import pandas as pd
import random

names = [f"Student{i+1}" for i in range(10)]
subjects = random.choices(["Math", "Science", "English"], k=10)
scores = np.random.randint(50, 101, size=10)
grades = []

df = pd.DataFrame({
    'Name': names,
    'Subject': subjects,
    'Score': scores,
    # 'Grade': grades
})

def assign_grade(score):
    if score >= 90: return 'A'
    elif score >= 80: return 'B'
    elif score >= 70: return 'C'
    elif score >= 60: return 'D'
    else: return 'F'

df['Grade'] = df['Score'].apply(assign_grade)

print("DataFrame Sorted by Score:\n", df.sort_values(by='Score', ascending=False))

avg_subject = df.groupby("Subject")["Score"].mean()
print("Average Score per Subject:\n", avg_subject)

def pandas_filter_pass(dataframe):
    return dataframe[dataframe['Grade'].isin(['A', 'B'])]

print("Filtered Records (A or B Grades):\n", pandas_filter_pass(df))


DataFrame Sorted by Score:
         Name  Subject  Score Grade
3   Student4  English    100     A
9  Student10  English     99     A
0   Student1     Math     91     A
5   Student6     Math     87     B
8   Student9  Science     83     B
6   Student7     Math     82     B
1   Student2  Science     80     B
7   Student8     Math     67     D
4   Student5  Science     61     D
2   Student3  English     50     F
Average Score per Subject:
 Subject
English    83.000000
Math       81.750000
Science    74.666667
Name: Score, dtype: float64
Filtered Records (A or B Grades):
         Name  Subject  Score Grade
0   Student1     Math     91     A
1   Student2  Science     80     B
3   Student4  English    100     A
5   Student6     Math     87     B
6   Student7     Math     82     B
8   Student9  Science     83     B
9  Student10  English     99     A


Problem 4

In [4]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score

positive_reviews = ["This movie was great!" for _ in range(50)]
negative_reviews = ["Terrible movie, not recommended." for _ in range(50)]
reviews = positive_reviews + negative_reviews
sentiments = ['positive'] * 50 + ['negative'] * 50

df_reviews = pd.DataFrame({'Review': reviews, 'Sentiment': sentiments})

vectorizer = CountVectorizer(max_features=500, stop_words='english')
X = vectorizer.fit_transform(df_reviews['Review'])
y = df_reviews['Sentiment']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = MultinomialNB()
model.fit(X_train, y_train)
preds = model.predict(X_test)
print("Naive Bayes Accuracy:", accuracy_score(y_test, preds))

def predict_review_sentiment(model, vectorizer, review):
    vec = vectorizer.transform([review])
    return model.predict(vec)[0]

print("Example Prediction:", predict_review_sentiment(model, vectorizer, "What a fantastic film!"))


Naive Bayes Accuracy: 1.0
Example Prediction: negative


Problem 5

In [5]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report

feedbacks = ["Good product and great service!" for _ in range(50)] + ["Bad quality, very disappointing." for _ in range(50)]
labels = ['good'] * 50 + ['bad'] * 50

vectorizer = TfidfVectorizer(max_features=300, stop_words='english', lowercase=True)
X = vectorizer.fit_transform(feedbacks)
y = labels

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=0)

lr = LogisticRegression()
lr.fit(X_train, y_train)
y_pred = lr.predict(X_test)

print("Logistic Regression Report:\n", classification_report(y_test, y_pred))

def text_preprocess_vectorize(texts, vectorizer):
    return vectorizer.transform(texts)

print("Vectorized Output Sample:\n", text_preprocess_vectorize(["Great quality, would buy again!"], vectorizer).toarray())


Logistic Regression Report:
               precision    recall  f1-score   support

         bad       1.00      1.00      1.00        12
        good       1.00      1.00      1.00        13

    accuracy                           1.00        25
   macro avg       1.00      1.00      1.00        25
weighted avg       1.00      1.00      1.00        25

Vectorized Output Sample:
 [[0.         0.         0.         0.70710678 0.         0.70710678
  0.        ]]
