### Working with pandas

In [1]:
import pandas as pd
import numpy as np

### series

In [2]:
s = pd.Series([1, 3, 5, np.nan], index=['a', 'b', 'c', 'd'])
print(s)

a    1.0
b    3.0
c    5.0
d    NaN
dtype: float64


In [3]:
data = [10, 20, 30]
series = pd.Series(data, index=['a', 'b', 'c'])
print(series)

a    10
b    20
c    30
dtype: int64


In [4]:
data = [10, 20, 30]
series = pd.Series(data, index=['a', 'b', 'c'])
print(series)

a    10
b    20
c    30
dtype: int64


#### dataframe

In [5]:
df = pd.DataFrame({
  'Name': ['Alice', 'Bob', 'Charlie'],
  'Age': [25, 30, 35]
})
print(df)

      Name  Age
0    Alice   25
1      Bob   30
2  Charlie   35


In [6]:
patients = pd.DataFrame({
    'Age': [45, 32, 67],
    'BP': [120, None, 140],
    'Diagnosis': ['Healthy', 'Diabetes', 'Hypertension']
})
print(patients)

   Age     BP     Diagnosis
0   45  120.0       Healthy
1   32    NaN      Diabetes
2   67  140.0  Hypertension


#### reading data from a file
```python
# CSV
df = pd.read_csv('data.csv')

# Excel
df = pd.read_excel('data.xlsx', sheet_name='Sheet1')

# JSON
df = pd.read_json('data.json')

# TSV (Tab-separated)
df = pd.read_csv('data.tsv', sep='\t')

# Hospital EHR Data (HL7 format)
ehr = pd.read_csv('ehr_export.csv', parse_dates=['AdmissionDate'])

# IoT Sensor Data
sensors = pd.read_json('iot_readings.json', lines=True)

# Financial Data (Excel with multiple sheets)
balance_sheets = pd.read_excel('q3_financials.xlsx', sheet_name=['Assets', 'Lia
# Financial Data (Excel with multiple sheets)
balance_sheets = pd.read_excel('q3_financials.xlsx', sheet_name=['Assets', 'Liabilities'])
```

## Operating on data 

In [7]:

# creating the DataFrame
df = pd.DataFrame({
    'City': [
        'Lagos', 'Abuja', 'Kano', 'Ibadan', 'Benin City', 'Port Harcourt', 'Jos', 'Kaduna', 'Enugu', 'Maiduguri',
        'Zaria', 'Aba', 'Ilorin', 'Abeokuta', 'Onitsha', 'Warri', 'Oshogbo', 'Calabar', 'Uyo', 'Makurdi',
        'Owerri', 'Akure', 'Bauchi', 'Sokoto', 'Minna', 'Lokoja', 'Yola', 'Gombe', 'Awka', 'Ado Ekiti',
        'Abakaliki', 'Katsina', 'Jalingo', 'Damaturu', 'Gusau'
    ],
    'State': [
        'Lagos', 'FCT', 'Kano', 'Oyo', 'Edo', 'Rivers', 'Plateau', 'Kaduna', 'Enugu', 'Borno',
        'Kaduna', 'Abia', 'Kwara', 'Ogun', 'Anambra', 'Delta', 'Osun', 'Cross River', 'Akwa Ibom', 'Benue',
        'Imo', 'Ondo', 'Bauchi', 'Sokoto', 'Niger', 'Kogi', 'Adamawa', 'Gombe', 'Anambra', 'Ekiti',
        'Ebonyi', 'Katsina', 'Taraba', 'Yobe', 'Zamfara'
    ],
    'Population': [
        21000000, 3600000, 3200000, 3100000, 1500000, 2300000, 900000, 1600000, 1200000, 1300000,
        1350000, 1000000, 850000, 1000000, 1100000, 500000, 730000, 480000, 600000, 400000,
        750000, 550000, 690000, 720000, 450000, 430000, 390000, 360000, 300000, 250000,
        280000, 340000, 310000, 270000, 290000
    ]
})

print(df)


             City        State  Population
0           Lagos        Lagos    21000000
1           Abuja          FCT     3600000
2            Kano         Kano     3200000
3          Ibadan          Oyo     3100000
4      Benin City          Edo     1500000
5   Port Harcourt       Rivers     2300000
6             Jos      Plateau      900000
7          Kaduna       Kaduna     1600000
8           Enugu        Enugu     1200000
9       Maiduguri        Borno     1300000
10          Zaria       Kaduna     1350000
11            Aba         Abia     1000000
12         Ilorin        Kwara      850000
13       Abeokuta         Ogun     1000000
14        Onitsha      Anambra     1100000
15          Warri        Delta      500000
16        Oshogbo         Osun      730000
17        Calabar  Cross River      480000
18            Uyo    Akwa Ibom      600000
19        Makurdi        Benue      400000
20         Owerri          Imo      750000
21          Akure         Ondo      550000
22         

#### filtering

In [16]:
# Filter rows where Population < 4000000
filter_df = df[df['Population'] < 4000000]
print(filter_df)

             City        State  Population
1           Abuja          FCT     3600000
2            Kano         Kano     3200000
3          Ibadan          Oyo     3100000
4      Benin City          Edo     1500000
5   Port Harcourt       Rivers     2300000
6             Jos      Plateau      900000
7          Kaduna       Kaduna     1600000
8           Enugu        Enugu     1200000
9       Maiduguri        Borno     1300000
10          Zaria       Kaduna     1350000
11            Aba         Abia     1000000
12         Ilorin        Kwara      850000
13       Abeokuta         Ogun     1000000
14        Onitsha      Anambra     1100000
15          Warri        Delta      500000
16        Oshogbo         Osun      730000
17        Calabar  Cross River      480000
18            Uyo    Akwa Ibom      600000
19        Makurdi        Benue      400000
20         Owerri          Imo      750000
21          Akure         Ondo      550000
22         Bauchi       Bauchi      690000
23         

In [17]:
# Multiple conditions
filter_df2 = df[(df['Population'] > 25) & (df['City'].str.startswith('K'))]
print(filter_df2)


       City    State  Population
2      Kano     Kano     3200000
7    Kaduna   Kaduna     1600000
31  Katsina  Katsina      340000


#### sorting 

In [10]:
df.sort_values('Population', ascending=False)  # Descending order


Unnamed: 0,City,State,Population
0,Lagos,Lagos,21000000
1,Abuja,FCT,3600000
2,Kano,Kano,3200000
3,Ibadan,Oyo,3100000
5,Port Harcourt,Rivers,2300000
7,Kaduna,Kaduna,1600000
4,Benin City,Edo,1500000
10,Zaria,Kaduna,1350000
9,Maiduguri,Borno,1300000
8,Enugu,Enugu,1200000


In [11]:
df.sort_values('Population', ascending=True)  # ascending order


Unnamed: 0,City,State,Population
29,Ado Ekiti,Ekiti,250000
33,Damaturu,Yobe,270000
30,Abakaliki,Ebonyi,280000
34,Gusau,Zamfara,290000
28,Awka,Anambra,300000
32,Jalingo,Taraba,310000
31,Katsina,Katsina,340000
27,Gombe,Gombe,360000
26,Yola,Adamawa,390000
19,Makurdi,Benue,400000


In [12]:
df.sort_values('City', ascending=False)  # Descending order


Unnamed: 0,City,State,Population
10,Zaria,Kaduna,1350000
26,Yola,Adamawa,390000
15,Warri,Delta,500000
18,Uyo,Akwa Ibom,600000
23,Sokoto,Sokoto,720000
5,Port Harcourt,Rivers,2300000
20,Owerri,Imo,750000
16,Oshogbo,Osun,730000
14,Onitsha,Anambra,1100000
24,Minna,Niger,450000


In [13]:
df.sort_values('City', ascending=True)  # ascending order


Unnamed: 0,City,State,Population
11,Aba,Abia,1000000
30,Abakaliki,Ebonyi,280000
13,Abeokuta,Ogun,1000000
1,Abuja,FCT,3600000
29,Ado Ekiti,Ekiti,250000
21,Akure,Ondo,550000
28,Awka,Anambra,300000
22,Bauchi,Bauchi,690000
4,Benin City,Edo,1500000
17,Calabar,Cross River,480000


#### selecting column

In [14]:
df['City']          # Single column (Series)


0             Lagos
1             Abuja
2              Kano
3            Ibadan
4        Benin City
5     Port Harcourt
6               Jos
7            Kaduna
8             Enugu
9         Maiduguri
10            Zaria
11              Aba
12           Ilorin
13         Abeokuta
14          Onitsha
15            Warri
16          Oshogbo
17          Calabar
18              Uyo
19          Makurdi
20           Owerri
21            Akure
22           Bauchi
23           Sokoto
24            Minna
25           Lokoja
26             Yola
27            Gombe
28             Awka
29        Ado Ekiti
30        Abakaliki
31          Katsina
32          Jalingo
33         Damaturu
34            Gusau
Name: City, dtype: object

In [15]:
df[['City', 'Population']] # Multiple columns (DataFrame)


Unnamed: 0,City,Population
0,Lagos,21000000
1,Abuja,3600000
2,Kano,3200000
3,Ibadan,3100000
4,Benin City,1500000
5,Port Harcourt,2300000
6,Jos,900000
7,Kaduna,1600000
8,Enugu,1200000
9,Maiduguri,1300000


#### Exploring data

In [18]:
df.head()        # First 5 rows

Unnamed: 0,City,State,Population
0,Lagos,Lagos,21000000
1,Abuja,FCT,3600000
2,Kano,Kano,3200000
3,Ibadan,Oyo,3100000
4,Benin City,Edo,1500000


In [19]:
df.tail(3)       # Last 3 rows

Unnamed: 0,City,State,Population
32,Jalingo,Taraba,310000
33,Damaturu,Yobe,270000
34,Gusau,Zamfara,290000


In [20]:
df.shape         # Rows and columns

(35, 3)

In [21]:
df.info()        # Data types and nulls

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 35 entries, 0 to 34
Data columns (total 3 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   City        35 non-null     object
 1   State       35 non-null     object
 2   Population  35 non-null     int64 
dtypes: int64(1), object(2)
memory usage: 968.0+ bytes


In [22]:
df.describe()    # Summary stats for numeric columns

Unnamed: 0,Population
count,35.0
mean,1545429.0
std,3492320.0
min,250000.0
25%,395000.0
50%,720000.0
75%,1250000.0
max,21000000.0


In [23]:
df.columns       # List column names

Index(['City', 'State', 'Population'], dtype='object')

In [24]:
df.index         # Row indices

RangeIndex(start=0, stop=35, step=1)