# Capstone Project – Python Fundamentals

**Name:** YestaRani  
**Description:** This project solves all tasks given in the Python Fundamentals capstone using Pandas and NumPy.



In [None]:
# Import pandas for data manipulation
import pandas as pd

# Import numpy for numerical operations
import numpy as np


## Task 1  
Create three DataFrames (Employee, Seniority Level, Project) and save them as CSV files.


In [None]:
# Creating Employee DataFrame
employee_df = pd.DataFrame({
    "ID": ["A001", "A002", "A003", "A004", "A005"],
    "Name": ["John Alter", "Alice Luxumberg", "Tom Sabestine", "Nina Adgra", "Amy Johny"],
    "Gender": ["M", "F", "M", "F", "F"],
    "City": ["Paris", "London", "Berlin", "Newyork", "Madrid"],
    "Age": [25, 27, 29, 31, 30]
})

# Saving Employee DataFrame as CSV
employee_df.to_csv("Employee.csv", index=False)

# Display Employee DataFrame
employee_df


In [None]:
# Creating Seniority Level DataFrame
seniority_df = pd.DataFrame({
    "ID": ["A001", "A002", "A003", "A004", "A005"],
    "Designation Level": [2, 2, 3, 2, 3]
})

# Saving Seniority DataFrame as CSV
seniority_df.to_csv("Seniority.csv", index=False)

seniority_df



In [None]:
# Creating Project DataFrame
project_df = pd.DataFrame({
    "ID": ["A001","A002","A003","A004","A005","A002","A005",
           "A003","A001","A003","A001","A004","A004","A005"],
    "Project": ["Project 1","Project 2","Project 3","Project 4","Project 5",
                "Project 6","Project 7","Project 8","Project 9","Project 10",
                "Project 11","Project 12","Project 13","Project 14"],
    "Cost": [1002000,2000000,4500000,5500000,np.nan,680000,400000,
             350000,np.nan,300000,2000000,1000000,3000000,200000],
    "Status": ["Finished","Ongoing","Finished","Ongoing","Finished","Failed",
               "Finished","Failed","Ongoing","Finished","Failed",
               "Ongoing","Finished","Finished"]
})

# Saving Project DataFrame as CSV
project_df.to_csv("Project.csv", index=False)

project_df


## Task 2  
Replace missing values in the Project Cost column using running average with a for loop.



In [None]:
# Creating a copy of Cost column to avoid SettingWithCopyWarning
costs = project_df["Cost"].copy()

# Initializing variables for running average
running_sum = 0
count = 0

# Looping through the Cost values
for i in range(len(costs)):
    if not np.isnan(costs.iloc[i]):
        running_sum += costs.iloc[i]
        count += 1
    else:
        costs.iloc[i] = running_sum / count

# Assigning updated values back to DataFrame
project_df["Cost"] = costs

# Display updated Project DataFrame
project_df


## Task 3  
Split the Name column in the Employee DataFrame into First Name and Last Name and remove the original Name column.


In [None]:
# Splitting the Name column into First Name and Last Name
employee_df[["First Name", "Last Name"]] = employee_df["Name"].str.split(" ", expand=True)

# Dropping the old Name column
employee_df.drop("Name", axis=1, inplace=True)

# Display updated Employee DataFrame
employee_df


## Task 4  
Join Employee, Seniority Level, and Project DataFrames into a single DataFrame named Final.


In [None]:
# Merging Employee DataFrame with Seniority DataFrame using ID
Final = employee_df.merge(seniority_df, on="ID", how="left")

# Merging the result with Project DataFrame using ID
Final = Final.merge(project_df, on="ID", how="left")

# Display the Final DataFrame
Final


## Task 5  
Add a Bonus column to the Final DataFrame. Employees who finished projects receive a 5% bonus based on project cost.


In [None]:
# Adding Bonus column: 5% bonus for Finished projects
Final["Bonus"] = np.where(
    Final["Status"] == "Finished",
    Final["Cost"] * 0.05,
    0
)

# Display updated Final DataFrame
Final


## Task 6  
Demote the designation level by 1 for employees whose projects have failed.  
Delete employee records whose designation level becomes greater than 4.



In [None]:
# Increasing designation level by 1 for failed projects (demotion)
Final.loc[Final["Status"] == "Failed", "Designation Level"] += 1

# Removing employees whose designation level is greater than 4
Final = Final[Final["Designation Level"] <= 4]

# Display updated Final DataFrame
Final


## Task 7  
Add “Mr.” and “Mrs.” as prefixes to the First Name based on gender and remove the Gender column.


In [None]:
# Adding title to First Name based on Gender
Final["First Name"] = np.where(
    Final["Gender"] == "M",
    "Mr. " + Final["First Name"],
    "Mrs. " + Final["First Name"]
)

# Dropping the Gender column as it is no longer needed
Final.drop("Gender", axis=1, inplace=True)

# Display updated Final DataFrame
Final


## Task 8
Promote designation level by 1 for employees whose age is more than 29 years

In [None]:
# Promoting employees whose age is greater than 29
# Promotion means reducing designation level by 1 (since 1 is highest)
Final.loc[Final["Age"] > 29, "Designation Level"] -= 1

# Display updated Final DataFrame
Final


## Task 9  
Create a DataFrame showing total project cost per employee.


In [None]:
# Calculating total project cost per employee
TotalProjCost = Final.groupby(
    ["ID", "First Name"]
)["Cost"].sum().reset_index()

# Renaming column for clarity
TotalProjCost.rename(columns={"Cost": "Total Project Cost"}, inplace=True)

# Display result
TotalProjCost


## Task 10  
Print all employee details whose city name contains the letter "o".


In [None]:
# Filtering employees whose city name contains letter 'o'
Final[Final["City"].str.contains("o", case=False)]


## Conclusion

This capstone project successfully demonstrates the use of Python fundamentals,
Pandas, and NumPy to solve real-world data problems. All tasks from data creation,
cleaning, transformation, and analysis were completed as per the project requirements.
