# Python Coding Assessment 
by Esaq A

### Executing Data Cleaning and Pandas Joins in Python

Task 1: Loading the data

In [1]:
import pandas as pd
import numpy as np

df = pd.read_csv("/Users/iamesaq/Documents/PythonPrac/Titanic-Dataset.csv")
df.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


Task 2: Printing Columns Names of the Dataframe

In [2]:
df.columns

Index(['PassengerId', 'Survived', 'Pclass', 'Name', 'Sex', 'Age', 'SibSp',
       'Parch', 'Ticket', 'Fare', 'Cabin', 'Embarked'],
      dtype='object')

Task 3: Printing Datatype of each Column in Dataframe

In [3]:
df.dtypes

PassengerId      int64
Survived         int64
Pclass           int64
Name            object
Sex             object
Age            float64
SibSp            int64
Parch            int64
Ticket          object
Fare           float64
Cabin           object
Embarked        object
dtype: object

Task 4: Data Cleaning

In [4]:
df.isnull().sum()

PassengerId      0
Survived         0
Pclass           0
Name             0
Sex              0
Age            177
SibSp            0
Parch            0
Ticket           0
Fare             0
Cabin          687
Embarked         2
dtype: int64

In [6]:
if df['Age'].isnull().any():
    df['Age'].fillna(df['Age'].median(), inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Age'].fillna(df['Age'].median(), inplace=True)


In [7]:
for col in ['Embarked', 'Cabin']:
    if df[col].isnull().any():
        df[col].fillna(df[col].mode()[0], inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df[col].fillna(df[col].mode()[0], inplace=True)


In [9]:
print("\nMissing values after cleaning:")
print(df.isnull().sum())


Missing values after cleaning:
PassengerId    0
Survived       0
Pclass         0
Name           0
Sex            0
Age            0
SibSp          0
Parch          0
Ticket         0
Fare           0
Cabin          0
Embarked       0
dtype: int64


In [10]:
print("\nCleaned DataFrame Head:")
print(df.head())


Cleaned DataFrame Head:
   PassengerId  Survived  Pclass  \
0            1         0       3   
1            2         1       1   
2            3         1       3   
3            4         1       1   
4            5         0       3   

                                                Name     Sex   Age  SibSp  \
0                            Braund, Mr. Owen Harris    male  22.0      1   
1  Cumings, Mrs. John Bradley (Florence Briggs Th...  female  38.0      1   
2                             Heikkinen, Miss. Laina  female  26.0      0   
3       Futrelle, Mrs. Jacques Heath (Lily May Peel)  female  35.0      1   
4                           Allen, Mr. William Henry    male  35.0      0   

   Parch            Ticket     Fare    Cabin Embarked  
0      0         A/5 21171   7.2500  B96 B98        S  
1      0          PC 17599  71.2833      C85        C  
2      0  STON/O2. 3101282   7.9250  B96 B98        S  
3      0            113803  53.1000     C123        S  
4      0       

Task 5: Creating a Second Dataframe

In [11]:
passenger_fare_details = pd.DataFrame()
passenger_fare_details['PassengerId'] = df['PassengerId']
passenger_fare_details['Fare'] = df['Fare']

In [12]:
print("Second DataFrame (passenger_fare_details) Head:")
print(passenger_fare_details.head())

Second DataFrame (passenger_fare_details) Head:
   PassengerId     Fare
0            1   7.2500
1            2  71.2833
2            3   7.9250
3            4  53.1000
4            5   8.0500


Task 6: Performing Pandas Join

In [13]:
merged_df = pd.merge(df, passenger_fare_details, on='PassengerId', how='left')

print("Merged DataFrame Head:")
print(merged_df.head())

Merged DataFrame Head:
   PassengerId  Survived  Pclass  \
0            1         0       3   
1            2         1       1   
2            3         1       3   
3            4         1       1   
4            5         0       3   

                                                Name     Sex   Age  SibSp  \
0                            Braund, Mr. Owen Harris    male  22.0      1   
1  Cumings, Mrs. John Bradley (Florence Briggs Th...  female  38.0      1   
2                             Heikkinen, Miss. Laina  female  26.0      0   
3       Futrelle, Mrs. Jacques Heath (Lily May Peel)  female  35.0      1   
4                           Allen, Mr. William Henry    male  35.0      0   

   Parch            Ticket   Fare_x    Cabin Embarked   Fare_y  
0      0         A/5 21171   7.2500  B96 B98        S   7.2500  
1      0          PC 17599  71.2833      C85        C  71.2833  
2      0  STON/O2. 3101282   7.9250  B96 B98        S   7.9250  
3      0            113803  53.1000  

Task 7: Sorting the Dataframe Values

In [14]:
sorted_by_age = merged_df.sort_values(by='Age')
print(sorted_by_age.head())

     PassengerId  Survived  Pclass                             Name     Sex  \
803          804         1       3  Thomas, Master. Assad Alexander    male   
755          756         1       2        Hamalainen, Master. Viljo    male   
469          470         1       3    Baclini, Miss. Helene Barbara  female   
644          645         1       3           Baclini, Miss. Eugenie  female   
78            79         1       2    Caldwell, Master. Alden Gates    male   

      Age  SibSp  Parch  Ticket   Fare_x    Cabin Embarked   Fare_y  
803  0.42      0      1    2625   8.5167  B96 B98        C   8.5167  
755  0.67      1      1  250649  14.5000  B96 B98        S  14.5000  
469  0.75      2      1    2666  19.2583  B96 B98        C  19.2583  
644  0.75      2      1    2666  19.2583  B96 B98        C  19.2583  
78   0.83      0      2  248738  29.0000  B96 B98        S  29.0000  


Task 8: Applying Functions

In [17]:
def status_upper(status) :
    return status.upper()
df['Name'] = df['Name'].apply(status_upper)
print(df['Name'].head())

0                              BRAUND, MR. OWEN HARRIS
1    CUMINGS, MRS. JOHN BRADLEY (FLORENCE BRIGGS TH...
2                               HEIKKINEN, MISS. LAINA
3         FUTRELLE, MRS. JACQUES HEATH (LILY MAY PEEL)
4                             ALLEN, MR. WILLIAM HENRY
Name: Name, dtype: object
