In [None]:
# Pandas is a powerful library for data manipulation and analysis.
# NumPy is often used along with Pandas for numerical operations.

# If not installed, uncomment and run:
# !pip install pandas numpy

import pandas as pd
import numpy as np

print("✅ Libraries imported successfully!")


In [None]:
# A Pandas Series is like a single column — it has data + an index.

nums = [1, 2, 3, 4, 5]
series_default = pd.Series(nums)

# Display the Series
print(series_default)


In [None]:
# We can provide custom index labels
nums = [1, 2, 3, 4, 5]
series_custom = pd.Series(nums, index=['a', 'b', 'c', 'd', 'e'])

print(series_custom)


In [None]:
# Series can also contain text data (strings)
fruits = ['Orange', 'Banana', 'Mango']
fruit_series = pd.Series(fruits, index=[1, 2, 3])
print(fruit_series)


In [None]:
# A dictionary becomes a Pandas Series (keys become index)
person = {'name': 'Asabeneh', 'country': 'Finland', 'city': 'Helsinki'}
series_from_dict = pd.Series(person)
print(series_from_dict)


In [None]:
# Create a Series where all values are the same
constant_series = pd.Series(10, index=[1, 2, 3])
print(constant_series)


In [None]:
# np.linspace(start, end, number_of_values)
s = pd.Series(np.linspace(5, 20, 10))
print(s)


In [None]:
# A DataFrame is like an Excel table (rows + columns)

data = [
    ['Asabeneh', 'Finland', 'Helsinki'],
    ['David', 'UK', 'London'],
    ['John', 'Sweden', 'Stockholm']
]

df1 = pd.DataFrame(data, columns=['Name', 'Country', 'City'])
print(df1)


In [None]:
data = {
    'Name': ['Asabeneh', 'David', 'John'],
    'Country': ['Finland', 'UK', 'Sweden'],
    'City': ['Helsinki', 'London', 'Stockholm']
}

df2 = pd.DataFrame(data)
print(df2)


In [None]:
data = [
    {'Name': 'Asabeneh', 'Country': 'Finland', 'City': 'Helsinki'},
    {'Name': 'David', 'Country': 'UK', 'City': 'London'},
    {'Name': 'John', 'Country': 'Sweden', 'City': 'Stockholm'}
]

df3 = pd.DataFrame(data)
print(df3)


In [None]:
# Download sample CSV (Weight-Height dataset)
# !curl -O https://raw.githubusercontent.com/Asabeneh/30-Days-Of-Python/master/data/weight-height.csv

df = pd.read_csv('weight-height.csv')

# Display first few rows
print(df.head())


In [None]:
# Last few rows
print(df.tail())

# Shape (rows, columns)
print("Shape:", df.shape)

# Columns
print("Columns:", df.columns.tolist())

# Summary of numerical columns
print(df.describe())


In [None]:
# Extract one column (this returns a Series)
heights = df['Height']
weights = df['Weight']

print("Heights:\n", heights.head())
print("\nWeights:\n", weights.head())


In [None]:
print("Height Summary:\n", heights.describe())
print("\nWeight Summary:\n", weights.describe())


In [None]:
# Create a new sample DataFrame
data = [
    {"Name": "Asabeneh", "Country": "Finland", "City": "Helsinki"},
    {"Name": "David", "Country": "UK", "City": "London"},
    {"Name": "John", "Country": "Sweden", "City": "Stockholm"}
]

df = pd.DataFrame(data)
print(df)


In [None]:
# Add Weight and Height columns
df['Weight'] = [74, 78, 69]
df['Height'] = [173, 175, 169]
print(df)


In [None]:
# Convert height from cm to meters
df['Height'] = df['Height'] * 0.01

# Calculate BMI = weight / height²
df['BMI'] = df['Weight'] / (df['Height'] ** 2)

print(df)


In [None]:
df['BMI'] = df['BMI'].round(1)
print(df)


In [None]:
df['Birth Year'] = [1769, 1985, 1990]
df['Current Year'] = 2020
print(df)


In [None]:
# Convert Birth Year and Current Year to integers
df['Birth Year'] = df['Birth Year'].astype(int)
df['Current Year'] = df['Current Year'].astype(int)

# Calculate Age
df['Age'] = df['Current Year'] - df['Birth Year']
print(df)


In [None]:
# Identify any unrealistic ages (e.g., > 120)
old_people = df[df['Age'] > 120]
print("Outliers:\n", old_people)

# Replace unrealistic ages with mean of realistic ones
valid_mean = df[df['Age'] < 120]['Age'].mean()
df.loc[df['Age'] > 120, 'Age'] = round(valid_mean)

print("\nAfter fixing ages:\n", df)


In [None]:
# Filter only young people (< 120 years)
young = df[df['Age'] < 120]
print("People under 120:\n", young)

# Filter people with BMI > 25
overweight = df[df['BMI'] > 25]
print("\nPeople with BMI > 25:\n", overweight)


In [None]:
# 1️⃣ Read hacker_news.csv
df = pd.read_csv('hacker_news.csv')

# 2️⃣ First and Last 5 Rows
print(df.head())
print(df.tail())

# 3️⃣ Get the Title Column
titles = df['title']
print("Title Column:\n", titles.head())

# 4️⃣ Count Rows and Columns
print("Shape:", df.shape)

# 5️⃣ Filter titles containing Python
python_titles = df[df['title'].str.contains('python', case=False, na=False)]
print("Python-related Titles:\n", python_titles.head())

# 6️⃣ Filter titles containing JavaScript
js_titles = df[df['title'].str.contains('javascript', case=False, na=False)]
print("JavaScript-related Titles:\n", js_titles.head())
