# Indexing

#### 4. Add leading zeros to a character column in a pandas series and make the length of the field 8 digits.

In [5]:
import pandas as pd
data = {'col1': ['apple', 'banana', 'cherry', 'date']}
df = pd.DataFrame(data)

df['padded'] = df['col1'].str.zfill(8)

df


Unnamed: 0,col1,padded
0,apple,000apple
1,banana,00banana
2,cherry,00cherry
3,date,0000date


# String and Regular Expression

In [2]:
import pandas as pd

data = {'Name': ['Alice', 'Bob', 'Charlie', 'David']}
df = pd.DataFrame(data)

df['Upper'] = df['Name'].str.upper()
df['Lower'] = df['Name'].str.lower()
df['Length'] = df['Name'].str.len()

print(df)


      Name    Upper    Lower  Length
0    Alice    ALICE    alice       5
1      Bob      BOB      bob       3
2  Charlie  CHARLIE  charlie       7
3    David    DAVID    david       5


#### 16. Get the Length of the Integer of a Column

In [11]:
import pandas as pd

df = pd.DataFrame({'Integer': [23, 567, 890, 1234]})

df['Integer_Length'] = df['Integer'].apply(lambda x: len(str(x)))

print(df)


   Integer  Integer_Length
0       23               2
1      567               3
2      890               3
3     1234               4


#### 24. Extract Email from a Specified Column

In [17]:
import pandas as pd

df = pd.DataFrame({'Text': ['a@example.com', 'b@sample.com', 'not_a_valid_email', 'c@mail.com']})

df['Email'] = df['Text'].str.extract(r'([a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,})')

print(df)


                Text          Email
0      a@example.com  a@example.com
1       b@sample.com   b@sample.com
2  not_a_valid_email            NaN
3         c@mail.com     c@mail.com


#### 26. Extract Word Mentioning Someone in Tweets Using @

In [15]:
import pandas as pd

df = pd.DataFrame({'Text': ['@a Hello!', '@b, how are you?', 'Just a random text', '@c @d are here!', '@e is joining us.']})

df['Mentions'] = df['Text'].str.findall(r'@\w+')

print(df)


                 Text  Mentions
0           @a Hello!      [@a]
1    @b, how are you?      [@b]
2  Just a random text        []
3     @c @d are here!  [@c, @d]
4   @e is joining us.      [@e]


# Joining and merging

#### 15. Combine two DataFrame objects by filling null values in one DataFrame with non-null values from other DataFrame.

In [25]:
import pandas as pd
import numpy as np

df1 = pd.DataFrame({'A': [np.nan, 0.0, np.nan], 'B': [3, 4, 5]})
df2 = pd.DataFrame({'A': [1, 1, 3], 'B': [3.0, np.nan, 3.0]})

result = df1.combine_first(df2)

print(result)


     A    B
0  1.0  3.0
1  0.0  4.0
2  3.0  5.0


# Pandas Grouping and Aggregating 

#### 32. split a given dataset using group by on multiple columns and drop last n rows of from each group.

In [35]:
import pandas as pd

data = {
    'ord_no': [70001, 70009, 70002, 70004, 70007, 70005, 70008, 70010, 70003, 70012, 70011, 70013],
    'purch_amt': [150.50, 270.65, 65.26, 110.50, 948.50, 2400.60, 5760.00, 1983.43, 2480.40, 250.45, 75.29, 3045.60],
    'ord_date': ['2012-10-05', '2012-09-10', '2012-10-05', '2012-08-17', '2012-09-10', '2012-07-27', '2012-09-10', '2012-10-10', '2012-10-10', '2012-06-27', '2012-08-17', '2012-04-25'],
    'customer_id': [3002, 3001, 3001, 3003, 3002, 3002, 3001, 3004, 3003, 3002, 3003, 3001],
    'salesman_id': [5002, 5003, 5001, 5003, 5002, 5001, 5001, 5003, 5003, 5002, 5003, 5001]
}

df = pd.DataFrame(data)

grouped = df.groupby(['customer_id', 'salesman_id'])

result = grouped.apply(lambda group: group.iloc[:-2])

result = result.reset_index(drop=True)

print(result)


   ord_no  purch_amt    ord_date  customer_id  salesman_id
0   70002      65.26  2012-10-05         3001         5001
1   70001     150.50  2012-10-05         3002         5002
2   70004     110.50  2012-08-17         3003         5003


  result = grouped.apply(lambda group: group.iloc[:-2])


# Time Series

#### 32. create a yearly time period from a specified year and display the properties of this period.

In [37]:
import pandas as pd

year = 2025
period = pd.Period(year=year, freq='Y')

print("Period:", period)
print("Start Time:", period.start_time)
print("End Time:", period.end_time)
print("Frequency:", period.freq)
print("Year:", period.year)


Period: 2025
Start Time: 2025-01-01 00:00:00
End Time: 2025-12-31 23:59:59.999999999
Frequency: <YearEnd: month=12>
Year: 2025


# Pandas Filter 

#### 26. Convert 'Display Value' to Integer Type

In [40]:
import pandas as pd

data = {
    "Year": [1986, 1986, 1985, 1986, 1987],
    "WHO region": ["Western Pacific", "Americas", "Africa", "Americas", "Americas"],
    "Country": ["Viet Nam", "Uruguay", "Cte d'Ivoire", "Colombia", "Saint Kitts and Nevis"],
    "Beverage Types": ["Wine", "Other", "Wine", "Beer", "Beer"],
    "Display Value": [0.00, 0.50, 1.62, 4.27, 1.98]
}
df = pd.DataFrame(data)



In [42]:
df['Display Value'] = df['Display Value'].round().astype(int)
print("Display Value after conversion to integer type:")
print(df)

Display Value after conversion to integer type:
   Year       WHO region                Country Beverage Types  Display Value
0  1986  Western Pacific               Viet Nam           Wine              0
1  1986         Americas                Uruguay          Other              0
2  1985           Africa           Cte d'Ivoire           Wine              2
3  1986         Americas               Colombia           Beer              4
4  1987         Americas  Saint Kitts and Nevis           Beer              2
