## More string Manuplation in Pandas 

In [2]:
import pandas as pd

In [3]:
data = {
    'Name' : [
        'Monal K', 'Bhuvika', 'Niranjan', 'riyan'
    ],
    'Email' : [
        'm@gmail.com', 'b@gmail.com', 'n@gmail.com', 'r@gmail.com'
    ]
}

df = pd.DataFrame(data)

In [4]:
df.head()

Unnamed: 0,Name,Email
0,Monal K,m@gmail.com
1,Bhuvika,b@gmail.com
2,Niranjan,n@gmail.com
3,riyan,r@gmail.com


In [5]:
df['Name'].str.replace('K', 'Kumar', regex=False)

0    Monal Kumar
1        Bhuvika
2       Niranjan
3          riyan
Name: Name, dtype: object

- Padding

In [6]:
df['Name'].str.pad(width=20, side='left', fillchar='-')

0    -------------Monal K
1    -------------Bhuvika
2    ------------Niranjan
3    ---------------riyan
Name: Name, dtype: object

In [7]:
df['Name'].str.pad(width=20, side='right', fillchar='-')

0    Monal K-------------
1    Bhuvika-------------
2    Niranjan------------
3    riyan---------------
Name: Name, dtype: object

## Customization

- We can customize the default setting 

In [8]:
# set default setting to show only 5 rows

pd.set_option('display.max_rows', 5)

df = pd.read_csv('D:\Btech_CS\Python\Pandas\Titanic-Dataset.csv')

df

  df = pd.read_csv('D:\Btech_CS\Python\Pandas\Titanic-Dataset.csv')


Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.2500,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
...,...,...,...,...,...,...,...,...,...,...,...,...
889,890,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0000,C148,C
890,891,0,3,"Dooley, Mr. Patrick",male,32.0,0,0,370376,7.7500,,Q


In [9]:
# Set float format to 2 decimal places

pd.set_option('display.float_format', '{:.2f}'.format)

df[['Fare','Age']]

Unnamed: 0,Fare,Age
0,7.25,22.00
1,71.28,38.00
...,...,...
889,30.00,26.00
890,7.75,32.00


In [10]:
# Temporarily Set Option (Context Manager)
with pd.option_context('display.max_rows', 3):
    print(df)

     PassengerId  Survived  Pclass                     Name   Sex   Age  \
0              1         0       3  Braund, Mr. Owen Harris  male 22.00   
..           ...       ...     ...                      ...   ...   ...   
890          891         0       3      Dooley, Mr. Patrick  male 32.00   

     SibSp  Parch     Ticket  Fare Cabin Embarked  
0        1      0  A/5 21171  7.25   NaN        S  
..     ...    ...        ...   ...   ...      ...  
890      0      0     370376  7.75   NaN        Q  

[891 rows x 12 columns]


In [11]:
# Resetting to Default
pd.reset_option('display.max_rows')

In [None]:
# You can also reset all options:
pd.reset_option('all')

In [None]:
# To avoid warning and error 
import warnings


with warnings.catch_warnings():
    warnings.simplefilter('ignore', FutureWarning)
    pd.reset_option('all')

## 1. Categorical Data in Pandas
Definition:
- Categorical data refers to variables that can take on one of a limited, fixed number of possible values — essentially,  categories. These are often non-numeric labels used to classify data.
```bash
Examples:

Gender: Male, Female, Other

Class: First Class, Second Class, Third Class

Grade: A, B, C, D

Why use categorical data?

Improves memory efficiency.

Enables faster computations.

Helps Pandas understand that these are qualitative values, not quantitative.

In [15]:
df = pd.read_csv('D:\Btech_CS\Python\Pandas\Titanic-Dataset.csv')

  df = pd.read_csv('D:\Btech_CS\Python\Pandas\Titanic-Dataset.csv')


In [16]:
df.head(5)

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [21]:
df['sex_category'] = df['Sex'].astype('category')

df[['Sex','sex_category']].head()

Unnamed: 0,Sex,sex_category
0,male,male
1,female,female
2,female,female
3,female,female
4,male,male


In [26]:
print(df['Sex'].dtype)

print(df['sex_category'].dtype)

object
category


- We do all this to convert this categorical data into numeric data whic is easy to compute and visualise for this we convert these categories into code.

In [29]:
df['Sex_code'] = df['sex_category'].cat.codes

In [33]:
df[['Sex','sex_category','Sex_code']].head()

# it converts categories to code in alphabetical order f = 0  cuz f comes first in alphabet and m = 1 and so on

Unnamed: 0,Sex,sex_category,Sex_code
0,male,male,1
1,female,female,0
2,female,female,0
3,female,female,0
4,male,male,1


- We also set the codes manually 

In [34]:
df['sex_mcodes'] = df['Sex'].apply(lambda x : 1 if x == 'male' else 0)

df[['Sex' , 'sex_mcodes']].head()

Unnamed: 0,Sex,sex_mcodes
0,male,1
1,female,0
2,female,0
3,female,0
4,male,1


In [35]:
df.head()


Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,sex_category,Sex_code,sex_mcodes
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S,male,1,1
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C,female,0,0
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S,female,0,0
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S,female,0,0
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S,male,1,1


## Date Functionality in Pandas
What It Is:
- Pandas provides powerful date and time functionalities that allow us to work with time-series data or perform operations based on dates easily.

```bash
Why It's Useful:

Converts strings into proper datetime objects.

Helps extract valuable time-based features (like day, month, weekday).

Enables filtering, resampling, and time-based grouping of data.



In [39]:
df = pd.DataFrame({
    'Event' : ['Concert', 'Conference', 'Wedding', 'Reception', 'last_day'],
    'date': ['2025-01-01', '2025-03-15', '2025-07-20', '2025-07-21', '2025-07-31']
})

df['date_n'] = pd.to_datetime(df['date'])

In [40]:
df.head()

Unnamed: 0,Event,date,date_n
0,Concert,2025-01-01,2025-01-01
1,Conference,2025-03-15,2025-03-15
2,Wedding,2025-07-20,2025-07-20
3,Reception,2025-07-21,2025-07-21
4,last_day,2025-07-31,2025-07-31


In [41]:
print(df.dtypes[['date', 'date_n' , 'Event']])

date              object
date_n    datetime64[ns]
Event             object
dtype: object


In [43]:
df['date_n'].dt.year

0    2025
1    2025
2    2025
3    2025
4    2025
Name: date_n, dtype: int32

In [45]:
df['date_n'].dt.month

0    1
1    3
2    7
3    7
4    7
Name: date_n, dtype: int32

In [46]:
df['date_n'].dt.day

df['date_n'].dt.weekday

df['date_n'].dt.day_name()

0    Wednesday
1     Saturday
2       Sunday
3       Monday
4     Thursday
Name: date_n, dtype: object

In [47]:
df[df['date_n']> '2025-07-01']

Unnamed: 0,Event,date,date_n
2,Wedding,2025-07-20,2025-07-20
3,Reception,2025-07-21,2025-07-21
4,last_day,2025-07-31,2025-07-31


In [50]:
df[df['date_n'].between('2025-01-01', '2025-06-01')]

Unnamed: 0,Event,date,date_n
0,Concert,2025-01-01,2025-01-01
1,Conference,2025-03-15,2025-03-15


## Date Airthematics

In [52]:
df['date_nw'] = df['date_n'] + pd.Timedelta(days=7)
df.head()

Unnamed: 0,Event,date,date_n,date_nw
0,Concert,2025-01-01,2025-01-01,2025-01-08
1,Conference,2025-03-15,2025-03-15,2025-03-22
2,Wedding,2025-07-20,2025-07-20,2025-07-27
3,Reception,2025-07-21,2025-07-21,2025-07-28
4,last_day,2025-07-31,2025-07-31,2025-08-07


In [56]:
df['date_timeremain'] = df['date_n'] - pd.Timestamp.today()

df.head()

Unnamed: 0,Event,date,date_n,date_nw,date_timeremain
0,Concert,2025-01-01,2025-01-01,2025-01-08,-168 days +01:54:08.908864
1,Conference,2025-03-15,2025-03-15,2025-03-22,-95 days +01:54:08.908864
2,Wedding,2025-07-20,2025-07-20,2025-07-27,32 days 01:54:08.908864
3,Reception,2025-07-21,2025-07-21,2025-07-28,33 days 01:54:08.908864
4,last_day,2025-07-31,2025-07-31,2025-08-07,43 days 01:54:08.908864
