In [None]:
# --- Data Processing with NumPy and Pandas ---

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# ------------------ NumPy Part ------------------

# 1. Matrix Operations
matrix = np.random.randint(1, 100, (3, 3))
print("---Matrix---")
print(matrix)
print("Shape:", matrix.shape)

print("---Transpose---")
print(matrix.T)

det = np.linalg.det(matrix)
print("Determinant:", det)

matrix_2 = 2 * matrix
print("Matrix x2:")
print(matrix_2)

# 2. Array Operations
arr = np.linspace(1, 50, 10, dtype=int)
print("---Array---")
print(arr)
print("Mean:", np.mean(arr))
print("Std:", np.std(arr))

print("---Squares---")
for i in arr:
    print(i ** 2, end=", ")

filt = np.where(arr > 25)
print("\nElements > 25:")
print(arr[filt])

# 3. NumPy Functions and Plot
arr2 = np.linspace(0, 2 * np.pi, 100)
sin = np.sin(arr2)
cos = np.cos(arr2)

plt.figure(figsize=(10, 6))
plt.plot(arr2, sin, label='sin(x)', color='blue')
plt.plot(arr2, cos, label='cos(x)', color='red', linestyle='--')
plt.title('Sine and Cosine between 0 and 2Ï€')
plt.xlabel('Radian')
plt.ylabel('Value')
plt.legend()
plt.show()

# ------------------ Pandas Part ------------------

# 1. DataFrame Creation
df = pd.DataFrame({
    'Name': ['Veli', 'Ayse', 'Mehmet', 'Betul'],
    'Age': [25, 30, 22, 17],
    'Salary': [5000, 6000, 4500, 5200]
})
print(df)

avg_age = df['Age'].mean()
avg_salary = df['Salary'].mean()
print("Average Age:", avg_age)
print("Average Salary:", avg_salary)

print("\nSalary > 5000:")
print(df[df['Salary'] > 5000])

print("\nSorted by Age:")
print(df.sort_values('Age'))

# 2. Import and Process CSV
data = pd.read_csv('train.csv')
print(data.head(10))

avg_age_2 = data['Age'].mean()
print("Average Age:", avg_age_2)

data['Age'] = data['Age'].fillna(avg_age_2)
print("---Missing values---")
print(data.isnull().sum())

print(data['Sex'].value_counts())
print("Survival rate: %", data['Survived'].mean() * 100)

# 3. Group and Visualization
group = data.groupby('Pclass')['Survived'].mean() * 100
print(group)

group.plot(kind='bar', figsize=(6, 5))
plt.title('Survival Rate by Pclass (%)')
plt.xlabel('Pclass')
plt.ylabel('Survival Rate (%)')
plt.show()