---

Create Indexes in Pandas

---

<h3>Default Index</h3>

In [3]:
import pandas as pd
data = {'Name':['Amit','Ankit','Ayush'],
        'Age':[25,28,32],
        'City':['Indore','Ghazipur','Pune']}

df = pd.DataFrame(data)
print(df)

    Name  Age      City
0   Amit   25    Indore
1  Ankit   28  Ghazipur
2  Ayush   32      Pune


<h3>Setting Index</h3>

In [4]:
import pandas as pd
data = {'Name': ['Amit','Ankit','Ayush'],
        'Age':[25,28,32],
        'City':['Indore','Ghazipur','Pune']}

df = pd.DataFrame(data)

#set the 'Name' column as index
df.set_index('Name',inplace = True)
print(df)

       Age      City
Name                
Amit    25    Indore
Ankit   28  Ghazipur
Ayush   32      Pune


In [5]:
import pandas as pd
data = {'Name':['Amit','Ankit','Ayush'],
        'Age':[25,28,32],
        'City': ['Indore','Ghazipur','Pune']}

df = pd.DataFrame(data)

#set the 'Name' column as index
df.set_index('Name',inplace=True)
print(df)

       Age      City
Name                
Amit    25    Indore
Ankit   28  Ghazipur
Ayush   32      Pune


<h3>1. inplace = True </h3>

In [7]:
import pandas as pd

#create a sample DataFrame
df = pd.DataFrame({'A':[1,2,3,4],'B':[5,6,7,8]})
print(df)

#Drop a column using inplace = True
df.drop('B',axis=1,inplace=True)

#The original DataFrame df is modified, column 'B' is dropped
print(df)


   A  B
0  1  5
1  2  6
2  3  7
3  4  8
   A
0  1
1  2
2  3
3  4


<h3>2. inplace = False </h3>

In [9]:
import pandas as pd

#create a sample dataframe
df = pd.DataFrame({'A':[1,2,3,4],'B':[5,6,7,8]})

#Drop a column using inplace = False(default)
new_df = df.drop('B', axis=1) 

#The original dataframe df is not modified, and new_df contains the changes
print(df)       #original df remains unchanged
print(new_df)   #new_df has the 'B' column dropped



   A  B
0  1  5
1  2  6
2  3  7
3  4  8
   A
0  1
1  2
2  3
3  4


---

Common Pandas Functions with inplace: 

drop(), sort_values(),fillna(),rename(),set_index()

---

In [None]:
import pandas as pd
import numpy as np

# Sample DataFrame
df = pd.DataFrame({
    'id': [1, 2, 3, 4],
    'name': ['Alice', 'Bob', 'Charlie', 'David'],
    'math': [90, np.nan, 85, np.nan],
    'age': [25, 30, 35, 40]
})

print("Original DataFrame:")
print(df)

# Example 1: fillna
# Without inplace
df_filled = df.fillna({'math': 0})
print("\nAfter fillna without inplace (new DataFrame):")
print(df_filled)
print("Original unchanged:")
print(df)

# With inplace
df.fillna({'math': 0}, inplace=True)
print("\nAfter fillna with inplace (original modified):")
print(df)

# Reset df for next examples
df = pd.DataFrame({
    'id': [1, 2, 3, 4],
    'name': ['Alice', 'Bob', 'Charlie', 'David'],
    'math': [90, np.nan, 85, np.nan],
    'age': [25, 30, 35, 40]
})

# Example 2: set_index
# Without inplace
df_indexed = df.set_index('id')
print("\nAfter set_index without inplace (new DataFrame):")
print(df_indexed)
print("Original unchanged:")
print(df)

# With inplace
df.set_index('id', inplace=True)
print("\nAfter set_index with inplace (original modified):")
print(df)

# Reset df for next example
df.reset_index(inplace=True)

# Example 3: rename
# Without inplace
df_renamed = df.rename(columns={'age': 'AgeYears'})
print("\nAfter rename without inplace (new DataFrame):")
print(df_renamed)
print("Original unchanged:")
print(df)

# With inplace
df.rename(columns={'age': 'AgeYears'}, inplace=True)
print("\nAfter rename with inplace (original modified):")
print(df)



# sort_values: Sort DataFrame by column(s)
df = pd.DataFrame({
    'name': ['Alice', 'Bob', 'Charlie', 'David'],
    'score': [88, 92, 85, 90],
    'age': [25, 30, 35, 40]
})

# Without inplace (returns new DataFrame)
df_sorted = df.sort_values(by='score')
print("\nsort_values without inplace (new DataFrame):")
print(df_sorted)
print("Original unchanged:")
print(df)

# With inplace (modifies original)
df.sort_values(by='score', inplace=True)
print("\nsort_values with inplace (original modified):")
print(df)

# drop: Remove specified labels from rows or columns
df = pd.DataFrame({
    'id': [1, 2, 3],
    'name': ['Alice', 'Bob', 'Charlie'],
    'age': [25, 30, 35]
})

# Without inplace (returns new DataFrame)
df_dropped = df.drop(columns=['age'])
print("\ndrop without inplace (new DataFrame):")
print(df_dropped)
print("Original unchanged:")
print(df)

# With inplace (modifies original)
df.drop(columns=['age'], inplace=True)
print("\ndrop with inplace (original modified):")
print(df)


Original DataFrame:
   id     name  math  age
0   1    Alice  90.0   25
1   2      Bob   NaN   30
2   3  Charlie  85.0   35
3   4    David   NaN   40

After fillna without inplace (new DataFrame):
   id     name  math  age
0   1    Alice  90.0   25
1   2      Bob   0.0   30
2   3  Charlie  85.0   35
3   4    David   0.0   40
Original unchanged:
   id     name  math  age
0   1    Alice  90.0   25
1   2      Bob   NaN   30
2   3  Charlie  85.0   35
3   4    David   NaN   40

After fillna with inplace (original modified):
   id     name  math  age
0   1    Alice  90.0   25
1   2      Bob   0.0   30
2   3  Charlie  85.0   35
3   4    David   0.0   40

After set_index without inplace (new DataFrame):
       name  math  age
id                    
1     Alice  90.0   25
2       Bob   NaN   30
3   Charlie  85.0   35
4     David   NaN   40
Original unchanged:
   id     name  math  age
0   1    Alice  90.0   25
1   2      Bob   NaN   30
2   3  Charlie  85.0   35
3   4    David   NaN   40

After 

<B>Summary:</B>

<B>fillna()</B> replaces missing values. inplace=True alters original data, False returns new DataFrame.

<B>set_index()</B> sets a column as index. inplace=True modifies original, False returns new.

<B>rename()</B> renames columns or indexes. inplace=True modifies original, False returns new.

<B>sort_values()</B> sorts rows by column values. inplace=True changes original, False returns new.

<B>drop()</B> removes specified rows or columns. inplace=True directly removes from original, False returns new DataFrame.

Using inplace=True methods modifies the DataFrame directly and returns None, while the default (inplace=False) returns a new DataFrame and leaves the original unchanged. Reassignment after inplace=False provides better flexibility and clarity in most coding scenarios

---

Example : 

---

In [11]:
import pandas as pd

#Create a DataFrame without specifying an index
df = pd.DataFrame({'A':[1,2,3],'B':[4,5,6]})

#By default, the index is a RangeIndex
print(df)
#in above ex, the dataframe has a RangeIndex(0,1,2) which is automatically assigned by pandas


   A  B
0  1  4
1  2  5
2  3  6


---

Creating a RangeIndex Explicitly:

---

In [None]:
import pandas as pd

#create dataframe
data = {'Name':['Amit','Ankit','Ayush'],
        'Age':[25,28,32],
        'City':['Indore','Ghazipur','Pune']}

df = pd.DataFrame(data)

#create a range index
df = pd.DataFrame(data,index=pd.RangeIndex(5,8,name='Index'))
print(df) 


        Name  Age      City
Index                      
5       Amit   25    Indore
6      Ankit   28  Ghazipur
7      Ayush   32      Pune


---

Example:

---

In [13]:
import pandas as pd

#create a RangeIndex from 10 to 20
index = pd.RangeIndex(start=10, stop=20,step=2)
df = pd.DataFrame({'A':[1,2,3,4,5]},index=index)
print(df)


    A
10  1
12  2
14  3
16  4
18  5


---

Renaming Index Labels (Row Labels)

---

In [None]:
import pandas as pd

#create dataframe
data = {'Name':['Amit','Ankit','Ayush'],
        'Age': [25,28,32],
        'City': ['Indore','Ghazipur','Pune']}

df = pd.DataFrame(data)
print('Original DataFrame : ',df)

#rename index
df.rename(index={0:'A',1:'B',2:'C'},inplace= True)

#display dataframe after index is renamed