### Create Series & DataFrame

In [3]:
import pandas as pd
import numpy as np
# series: 1 column
# dataframe:multiple column
# 1. CREATE SERIES & DATAFRAME
print("1. CREATE SERIES & DATAFRAME")

# Series
s = pd.Series([3, -5, 7, 4])
# print("\nSeries s:\n")

print(s)

# DataFrame
data = {
    "Country": [
        "Belgium", "India", "Brazil", "Somalia", "Kenya",
        "Ethiopia", "USA", "UK", "Canada", "Germany",
        "France", "China", "Japan", "Saudi Arabia", "South Africa"
    ],
    "Capital": [
        "Brussels", "New Delhi", "Brasília", "Mogadishu", "Nairobi",
        "Addis Ababa", "Washington", "London", "Ottawa", "Berlin",
        "Paris", "Beijing", "Tokyo", "Riyadh", "Pretoria"
    ],
    "Population": [
        11_190_846, 1_303_171_035, 207_847_528, 16_000_000, 55_000_000,
        120_000_000, 331_000_000, 67_000_000, 38_000_000, 83_000_000,
        65_000_000, 1_402_000_000, 125_000_000, 35_000_000, 59_000_000
    ]
}

data_frame = pd.DataFrame(data)
print("\nDataFrame df:\n", data_frame)


1. CREATE SERIES & DATAFRAME
0    3
1   -5
2    7
3    4
dtype: int64

DataFrame df:
          Country      Capital  Population
0        Belgium     Brussels    11190846
1          India    New Delhi  1303171035
2         Brazil     Brasília   207847528
3        Somalia    Mogadishu    16000000
4          Kenya      Nairobi    55000000
5       Ethiopia  Addis Ababa   120000000
6            USA   Washington   331000000
7             UK       London    67000000
8         Canada       Ottawa    38000000
9        Germany       Berlin    83000000
10        France        Paris    65000000
11         China      Beijing  1402000000
12         Japan        Tokyo   125000000
13  Saudi Arabia       Riyadh    35000000
14  South Africa     Pretoria    59000000


### Series with default Column Index

In [5]:
import pandas as pd

# Create a Series
ages = pd.Series([20, 22, 25, 30, 28,30]) #default index

# Convert to DataFrame
df = pd.DataFrame(ages)

df

Unnamed: 0,0
0,20
1,22
2,25
3,30
4,28
5,30


### Custom Index

In [7]:
import pandas as pd

s = pd.Series([10, 20, 30, 40], index=["a", "b", "c", "d"])
print(s)


a    10
b    20
c    30
d    40
dtype: int64


### Custome Column

In [9]:
ages = pd.Series([20, 22, 25, 30, 28], name="Age")

df = pd.DataFrame(ages)

df


Unnamed: 0,Age
0,20
1,22
2,25
3,30
4,28


In [10]:
### 3. Convert Series → DataFrame using a dictionary

In [74]:
ages = pd.Series([20, 22, 25, 30, 28])

df = pd.DataFrame({"Age": ages})

ages

0    20
1    22
2    25
3    30
4    28
dtype: int64

### Convert multiple Series → DataFrame

In [13]:
names = pd.Series(["Ali", "Asha", "John"])
ages  = pd.Series([20, 22, 25])

df = pd.DataFrame({
    "Name": names,
    "Age": ages,
}
)

df


Unnamed: 0,Name,Age
0,Ali,20
1,Asha,22
2,John,25


In [14]:
import pandas as pd

names = pd.Series(["Ali", "Asha", "John"])
ages  = pd.Series([20, 22, 25])

df1 = pd.DataFrame(
    {
        "Name": names,
        "Age": ages
    })
df1.index = ["P1", "P2", "P3"]



print(df1)

#     "Age": [20, 22, 25]

    Name  Age
P1   Ali   20
P2  Asha   22
P3  John   25


#### a new DataFrame that uses the column names AND the index of another existing DataFrame

In [16]:
import pandas as pd

df2 = pd.DataFrame({
    "Name": ["Hassan", "Ahmed", "Mohamed"],
    "Age": [20, 22, 25]
})

df2


Unnamed: 0,Name,Age
0,Hassan,20
1,Ahmed,22
2,Mohamed,25


In [17]:
df2 = pd.DataFrame(df1,columns=df1.columns, index=df1.index)

print(df2)

    Name  Age
P1   Ali   20
P2  Asha   22
P3  John   25


### Read/Write CSV & Excel

In [19]:
print("2. READ / WRITE EXCEL & CSV")

# Write CSV
df.to_csv("test.csv", index=False)
print("Saved test.csv")

# Read CSV
df_csv = pd.read_csv("test.csv")
print("\nRead from CSV:\n", df_csv)




2. READ / WRITE EXCEL & CSV
Saved test.csv

Read from CSV:
    Name  Age
0   Ali   20
1  Asha   22
2  John   25


In [20]:
# Write Excel
df.to_excel("test.xlsx", sheet_name="Sheet1", index=False)
print("Saved test.xlsx")

# Read Excel
df_xl = pd.read_excel("test.xlsx")
print("\nRead from Excel:\n", df_xl)

Saved test.xlsx

Read from Excel:
    Name  Age
0   Ali   20
1  Asha   22
2  John   25


### Boolean Filtering

In [22]:
print("4. BOOLEAN FILTERING")

print("\nCountries with population > 200 million:\n",
data_frame[data_frame['Population'] > 200_000_000])

4. BOOLEAN FILTERING

Countries with population > 200 million:
    Country     Capital  Population
1    India   New Delhi  1303171035
2   Brazil    Brasília   207847528
6      USA  Washington   331000000
11   China     Beijing  1402000000


### Dropping Rows & Columns

In [24]:
print("5. DROPPING ROWS & COLUMNS")

print("\nDrop row with index 1:\n", df.drop(1, axis=0))
# print("\nDrop 'Capital' column:\n", df.drop('Capital', axis=1))

5. DROPPING ROWS & COLUMNS

Drop row with index 1:
    Name  Age
0   Ali   20
2  John   25


In [25]:
print("RESET INDEX EXAMPLE")

df2 = df.drop(1).reset_index(drop=True)
print("\nReset index after dropping row:\n", df2)


RESET INDEX EXAMPLE

Reset index after dropping row:
    Name  Age
0   Ali   20
1  John   25


### DataFrame Information

In [27]:
print("6. BASIC DATAFRAME INFO")

print("\ndf.shape =", df.shape)
print("\ndf.columns =", df.columns)
print("\ndf.info():")
print(df.info())


6. BASIC DATAFRAME INFO

df.shape = (3, 2)

df.columns = Index(['Name', 'Age'], dtype='object')

df.info():
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3 entries, 0 to 2
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Name    3 non-null      object
 1   Age     3 non-null      int64 
dtypes: int64(1), object(1)
memory usage: 180.0+ bytes
None


In [28]:
data_frame.describe()

Unnamed: 0,Population
count,15.0
mean,261214000.0
std,451055400.0
min,11190850.0
25%,46500000.0
50%,67000000.0
75%,166423800.0
max,1402000000.0


In [29]:
import numpy as np
numbers = [5,7,9,12,15,18,20,22,25,30]


print("Q1 =", np.percentile(numbers, 25))
print("Q3 =", np.percentile(numbers, 75))
print("Q2 =", np.percentile(numbers, 50))

Q1 = 9.75
Q3 = 21.5
Q2 = 16.5


In [30]:
numberss= sum(numbers)
numberss

163

### Inspecting Data

In [32]:
# df.head() # dsiplay first 5
data_frame.tail() # display last 5

# data_frame.shape, df.info()

# data_frame.describe()

Unnamed: 0,Country,Capital,Population
10,France,Paris,65000000
11,China,Beijing,1402000000
12,Japan,Tokyo,125000000
13,Saudi Arabia,Riyadh,35000000
14,South Africa,Pretoria,59000000


In [33]:
 data_frame.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 15 entries, 0 to 14
Data columns (total 3 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   Country     15 non-null     object
 1   Capital     15 non-null     object
 2   Population  15 non-null     int64 
dtypes: int64(1), object(2)
memory usage: 492.0+ bytes


## Handling Missing Values

In [35]:

data_frame.isnull().sum() # finding missing values

data_frame.fillna(0) # fill with 0

Unnamed: 0,Country,Capital,Population
0,Belgium,Brussels,11190846
1,India,New Delhi,1303171035
2,Brazil,Brasília,207847528
3,Somalia,Mogadishu,16000000
4,Kenya,Nairobi,55000000
5,Ethiopia,Addis Ababa,120000000
6,USA,Washington,331000000
7,UK,London,67000000
8,Canada,Ottawa,38000000
9,Germany,Berlin,83000000


In [36]:
import pandas as pd
import numpy as np

df = pd.DataFrame({
    'name': ['Ali', 'Amina', 'Omar', 'Lulu', 'Khalid'],
    'age': [25,30,np.nan, 22, np.nan],
    'salary': [50000, 60000, np.nan, 45000, np.nan],
    'city': ['Mogadishu', 'Hargeisa', np.nan, 'Badoa', 'Kismayo']
})

print(df)


     name   age   salary       city
0     Ali  25.0  50000.0  Mogadishu
1   Amina  30.0  60000.0   Hargeisa
2    Omar   NaN      NaN        NaN
3    Lulu  22.0  45000.0      Badoa
4  Khalid   NaN      NaN    Kismayo


In [37]:
# Drop_city=df.drop('city', axis=1)
# Drop_city

In [38]:
# df['age'] = df['age'].fillna(df['age'].mean())
# df

In [39]:
# df['city'] = df['city'].fillna(df['city'].mode()[0])

# df['city'] = df['city'].fillna("Unknown")
df

Unnamed: 0,name,age,salary,city
0,Ali,25.0,50000.0,Mogadishu
1,Amina,30.0,60000.0,Hargeisa
2,Omar,,,
3,Lulu,22.0,45000.0,Badoa
4,Khalid,,,Kismayo


### Forward fill

In [41]:
df.fillna(method='ffill')

  df.fillna(method='ffill')


Unnamed: 0,name,age,salary,city
0,Ali,25.0,50000.0,Mogadishu
1,Amina,30.0,60000.0,Hargeisa
2,Omar,30.0,60000.0,Hargeisa
3,Lulu,22.0,45000.0,Badoa
4,Khalid,22.0,45000.0,Kismayo


### Backward fill

In [43]:
# df.fillna(method='bfill')


In [76]:
# Writing
with open("sample.txt", "w") as f:
    f.write("Hello ML")

# Reading
with open("sample.txt", "r") as f:
    print(f.read())

Hello ML


In [None]:
1.: GGGJ



In [None]:
1. TTY
