# Understand concepts here

Here first, I will try to understand the concepts using a small data set.

In [2]:
import pandas as pd
import numpy as np

In [3]:
df = pd.DataFrame([
    {'firstName': 'Jhon', 'lastName': 'Wick', 'gender':'male'},
    {'firstName': 'Jack', 'lastName': 'Sparrow', 'gender':'male'},
    {'firstName': 'Maria', 'lastName': 'Sanchez', 'gender':'female'},
    {'firstName': 'Peter', 'lastName': 'Parker', 'gender':'male'},
])

df.head()

Unnamed: 0,firstName,lastName,gender
0,Jhon,Wick,male
1,Jack,Sparrow,male
2,Maria,Sanchez,female
3,Peter,Parker,male


In [4]:
df.columns = ['first_name', 'last_name', 'gender']
df.head()

Unnamed: 0,first_name,last_name,gender
0,Jhon,Wick,male
1,Jack,Sparrow,male
2,Maria,Sanchez,female
3,Peter,Parker,male


In [5]:
df.columns = [x.upper() for x in df.columns]
df.head()

Unnamed: 0,FIRST_NAME,LAST_NAME,GENDER
0,Jhon,Wick,male
1,Jack,Sparrow,male
2,Maria,Sanchez,female
3,Peter,Parker,male


In [6]:
df['FIRST_NAME'] = [x.upper() for x in df['FIRST_NAME']]
df.head()

Unnamed: 0,FIRST_NAME,LAST_NAME,GENDER
0,JHON,Wick,male
1,JACK,Sparrow,male
2,MARIA,Sanchez,female
3,PETER,Parker,male


In [7]:
df

Unnamed: 0,FIRST_NAME,LAST_NAME,GENDER
0,JHON,Wick,male
1,JACK,Sparrow,male
2,MARIA,Sanchez,female
3,PETER,Parker,male


In [8]:
df.rename(columns={'FIRST_NAME': 'first_name'}, inplace=True)
df.head()

Unnamed: 0,first_name,LAST_NAME,GENDER
0,JHON,Wick,male
1,JACK,Sparrow,male
2,MARIA,Sanchez,female
3,PETER,Parker,male


In [9]:
import pandas as pd

# Define the replacement dictionary
replacement = {"name": "NAME", "age": "Age", "school": "SchOOl"}

# Create a sample DataFrame with a "MainBranch" column
df = pd.DataFrame({"MainBranch": ["I am a name", "I am old", "I go to school"]})

# Replace values in "MainBranch" based on the dictionary
df["MainBranch"] = df["MainBranch"].replace(replacement, regex=True)

# Print the updated DataFrame
print(df)

       MainBranch
0     I am a NAME
1        I am old
2  I go to SchOOl


In [10]:
data = pd.DataFrame({
    "records": ["name", "age", "age", "name", "location", "address"]
})

replacements = {"name": "Naam", "age": "Umar", "location": "jagah"}

def replace_values(value):
    if value in replacements:
        return replacements[value]
    else:
        return None


data["records"] = data["records"].apply(replace_values)
print(data)

  records
0    Naam
1    Umar
2    Umar
3    Naam
4   jagah
5    None


In [11]:
td = pd.DataFrame([
    {'firstName': 'Jhon', 'lastName': 'Wick', 'gender':'male'},
    {'firstName': 'Jack', 'lastName': 'Sparrow', 'gender':'male'},
    {'firstName': 'Maria', 'lastName': 'Sanchez', 'gender':'female'},
    {'firstName': 'Peter', 'lastName': 'Parker', 'gender':'male'},
])
td.head()

Unnamed: 0,firstName,lastName,gender
0,Jhon,Wick,male
1,Jack,Sparrow,male
2,Maria,Sanchez,female
3,Peter,Parker,male


In [12]:
td['firstName'] = td['firstName'].replace({'Jhon': 'A'})
td.head()

Unnamed: 0,firstName,lastName,gender
0,A,Wick,male
1,Jack,Sparrow,male
2,Maria,Sanchez,female
3,Peter,Parker,male


In [13]:
td['full_name'] = td['firstName'] + ' ' + td['lastName']
td.head()

Unnamed: 0,firstName,lastName,gender,full_name
0,A,Wick,male,A Wick
1,Jack,Sparrow,male,Jack Sparrow
2,Maria,Sanchez,female,Maria Sanchez
3,Peter,Parker,male,Peter Parker


In [14]:
td.drop(columns=['firstName', 'lastName'], inplace=True)
td

Unnamed: 0,gender,full_name
0,male,A Wick
1,male,Jack Sparrow
2,female,Maria Sanchez
3,male,Peter Parker


In [15]:
td[['first_name', 'last_name']] = td['full_name'].str.split(' ', expand=True)
td

Unnamed: 0,gender,full_name,first_name,last_name
0,male,A Wick,A,Wick
1,male,Jack Sparrow,Jack,Sparrow
2,female,Maria Sanchez,Maria,Sanchez
3,male,Peter Parker,Peter,Parker


# Trying to understand concat

In [16]:
studentData = pd.DataFrame([
    {'id': 1, 'name': 'Amitav', 'state': 'WB', 'city': 'Kolkata'},
    {'id': 2, 'name': 'Arnav', 'state': 'MH', },
    {'id': 3, 'name': 'Puneet', 'state': 'MH', 'city': None},
    {'id': 4, 'name': 'Amar', 'state': 'MH', 'city': ''},
    {'id': 5, 'name': '', 'state': '', 'city': ''},
])

result = pd.DataFrame([
    {'id': 1, 'Maths': 80, 'English': 60},
    {'id': 2, 'Maths': 90, 'English': 80},
])

final = pd.concat([studentData, result], axis='columns')
final.head(10)

Unnamed: 0,id,name,state,city,id.1,Maths,English
0,1,Amitav,WB,Kolkata,1.0,80.0,60.0
1,2,Arnav,MH,,2.0,90.0,80.0
2,3,Puneet,MH,,,,
3,4,Amar,MH,,,,
4,5,,,,,,


In [17]:
studentData

Unnamed: 0,id,name,state,city
0,1,Amitav,WB,Kolkata
1,2,Arnav,MH,
2,3,Puneet,MH,
3,4,Amar,MH,
4,5,,,


In [18]:
# empty values are not dropped by dropna function. 
studentData.dropna()

Unnamed: 0,id,name,state,city
0,1,Amitav,WB,Kolkata
3,4,Amar,MH,
4,5,,,


In [19]:
studentData = pd.DataFrame([
    {'id': 1, 'name': 'Amitav', 'state': 'WB', 'city': 'Kolkata', 'age': 1},
    {'id': 2, 'name': 'Arnav', 'state': 'MH', 'age': 2},
    {'id': 3, 'name': 'Puneet', 'state': 'MH', 'city': None, 'age': ''},
    {'id': 4, 'name': 'Amar', 'state': 'MH', 'city': '', 'age': ''},
    {'id': 5, 'name': '', 'state': '', 'city': '', 'age': 44},
    {'id': 6, 'name': 'Ravi', 'city': 'Guj', 'age': 24},
])
studentData.replace('', np.nan, inplace=True)
studentData['age'].replace(np.nan, studentData['age'].median(), inplace=True)
studentData

Unnamed: 0,id,name,state,city,age
0,1,Amitav,WB,Kolkata,1.0
1,2,Arnav,MH,,2.0
2,3,Puneet,MH,,13.0
3,4,Amar,MH,,13.0
4,5,,,,44.0
5,6,Ravi,,Guj,24.0


In [20]:
# The axis one completely drops the column. Which might not be a good thing
# studentData.dropna(axis='columns')
studentData.dropna(subset=['city'])

Unnamed: 0,id,name,state,city,age
0,1,Amitav,WB,Kolkata,1.0
5,6,Ravi,,Guj,24.0


In [21]:
studentData.isna()

Unnamed: 0,id,name,state,city,age
0,False,False,False,False,False
1,False,False,False,True,False
2,False,False,False,True,False
3,False,False,False,True,False
4,False,True,True,True,False
5,False,False,True,False,False


In [22]:
# studentData.fillna(0)

In [23]:
studentData.dtypes

id         int64
name      object
state     object
city      object
age      float64
dtype: object

In [24]:
studentData['city'].unique()

array(['Kolkata', nan, None, 'Guj'], dtype=object)

In [25]:
studentData.shape

(6, 5)

In [26]:
studentData = pd.DataFrame([
    {'id': 1, 'name': 'Amitav', 'state': 'WB', 'city': 'Kolkata'},
    {'id': 2, 'name': 'Arnav', 'state': 'MH', },
    {'id': 3, 'name': 'Puneet', 'state': 'MH', 'city': None},
    {'id': 4, 'name': 'Amar', 'state': 'MH', 'city': ''},
    {'id': 5, 'name': '', 'state': '', 'city': ''},
])

In [49]:
for row in studentData.itertuples():
    studentData['name122'] = 'Amitav'

print(studentData)

   id    name state     city   name1 name122
0   1  Amitav    WB  Kolkata  Amitav  Amitav
1   2  Amitav    MH      NaN  Amitav  Amitav
2   3  Amitav    MH     None  Amitav  Amitav
3   4  Amitav    MH           Amitav  Amitav
4   5  Amitav                 Amitav  Amitav


In [47]:
def addName():
    return 'name is Amitav'



'na'