In [None]:


## **Introduction to Pandas**

## **Understanding Series and DataFrame**

Pandas is a powerful open-source data analysis and manipulation library 
for Python. It is built on top of NumPy and provides high-level data 
structures and tools designed for practical and efficient data analysis.
Pandas is widely used in data science, machine learning, and 
data visualization projects due to its ease of use and versatility.


### **1. Series**
- A Series is a one-dimensional labeled array.
- Syntax: 
  pandas.Series(data, index=index)

- Key Characteristics:
  - Homogeneous data type.
  - Indexed like a dictionary for quick access.

### **2. DataFrame**
- A DataFrame is a two-dimensional labeled data structure with rows and columns.
- Syntax:
  
  pandas.DataFrame(data, index=index, columns=columns)
  
- Key Characteristics:
  - Columns can have different data types.
  - Offers functionalities for data manipulation, analysis, and visualization.



In [2]:
import pandas as pd

In [3]:
import pandas as pd
data = [1, 2, 3, 4, 5]
series = pd.Series(data, index=['a', 'b', 'c', 'd', 'e'])
print(series)


a    1
b    2
c    3
d    4
e    5
dtype: int64


In [None]:
import pandas as pd
data = [1, 2, 3, 4, 5]
series = pd.Series(data, index=['a', 'b', 'c', 'd', 'e'])
print(series)


In [None]:
data = [1, 2.4, 3.3,"abc", 5]
series = pd.Series(data, index=['a', 'b', 'c', 'd', 'e'])
print(series)


In [None]:
data = {
      'Name': ['Alice', 'Bob', 'Charlie'],
      'Age': [25, 30, 35],
      'City': ['New York', 'Los Angeles', 'Chicago']
  }
df = pd.DataFrame(data)
print(df)

## **Creating DataFrames**

### **1. From Dictionaries**

In [None]:
import pandas as pd

In [5]:
data = {
      'Name': ['John', 'Anna', 'Peter'],
      'Age': [28, 24, 35],
      'Profession': ['Engineer', 'Doctor', 'Lawyer']
  }
print(data)
df = pd.DataFrame(data)
print(df)


{'Name': ['John', 'Anna', 'Peter'], 'Age': [28, 24, 35], 'Profession': ['Engineer', 'Doctor', 'Lawyer']}
    Name  Age Profession
0   John   28   Engineer
1   Anna   24     Doctor
2  Peter   35     Lawyer


### **2. From Arrays**

In [None]:
import numpy as np
data = np.array([[1, 2, 3], [4, 5, 6]])
df = pd.DataFrame(data, columns=['A', 'B', 'C'])
print(df)  

### Indexing in series:

In [7]:
data = pd.Series([10, 20, 30, 40], index=['a', 'b', 'c', 'd'])

# Accessing by label
print(data['b'])  

# Accessing by position
print(data[2])  


20
30


  print(data[2])


### Indexing in DataFrame

In [11]:
# Creating a DataFrame
data = {
    'Name': ['Alice', 'Bob', 'Charlie'],
    'Age': [25, 30, 35],
    'City': ['New York', 'Los Angeles', 'Chicago']
}
df = pd.DataFrame(data)
print(df)

      Name  Age         City
0    Alice   25     New York
1      Bob   30  Los Angeles
2  Charlie   35      Chicago


In [12]:
# Accessing a column by label
print(df['Name']) 

0      Alice
1        Bob
2    Charlie
Name: Name, dtype: object


In [13]:
# Accessing multiple columns
print(df[['Name', 'Age']])  # Outputs "Name" and "Age" columns


      Name  Age
0    Alice   25
1      Bob   30
2  Charlie   35


In [14]:
# Accessing a row using .loc (label-based)
print(df.loc[1])  # Row with index 1 (Bob)


Name            Bob
Age              30
City    Los Angeles
Name: 1, dtype: object


In [None]:
# Accessing a row using .iloc (position-based)
print(df.iloc[2])  # Third row (Charlie)

#### slicing:


In [15]:
import pandas as pd

# Creating a Series
data = pd.Series([10, 20, 30, 40, 50], index=['a', 'b', 'c', 'd', 'e'])

# Slicing by position
print(data[1:4])  # Outputs 20,30,40

# Slicing by index label
print(data['b':'d'])  # Outputs 20,30,40

# Reverse slicing
print(data[::-1])  # Outputs elements in reverse order

# Slicing with step
print(data[::2])  # Every second element


b    20
c    30
d    40
dtype: int64
b    20
c    30
d    40
dtype: int64
e    50
d    40
c    30
b    20
a    10
dtype: int64
a    10
c    30
e    50
dtype: int64


### **3. From CSV Files** 

In [None]:
import pandas as pd

In [19]:
df = pd.read_csv("/kaggle/input/home-loan-approval-train-dataset/loan_sanction_train.csv")
df.head(10)

Unnamed: 0,Loan_ID,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status
0,LP001002,Male,No,0,Graduate,No,5849,0.0,,360.0,1.0,Urban,Y
1,LP001003,Male,Yes,1,Graduate,No,4583,1508.0,128.0,360.0,1.0,Rural,N
2,LP001005,Male,Yes,0,Graduate,Yes,3000,0.0,66.0,360.0,1.0,Urban,Y
3,LP001006,Male,Yes,0,Not Graduate,No,2583,2358.0,120.0,360.0,1.0,Urban,Y
4,LP001008,Male,No,0,Graduate,No,6000,0.0,141.0,360.0,1.0,Urban,Y
5,LP001011,Male,Yes,2,Graduate,Yes,5417,4196.0,267.0,360.0,1.0,Urban,Y
6,LP001013,Male,Yes,0,Not Graduate,No,2333,1516.0,95.0,360.0,1.0,Urban,Y
7,LP001014,Male,Yes,3+,Graduate,No,3036,2504.0,158.0,360.0,0.0,Semiurban,N
8,LP001018,Male,Yes,2,Graduate,No,4006,1526.0,168.0,360.0,1.0,Urban,Y
9,LP001020,Male,Yes,1,Graduate,No,12841,10968.0,349.0,360.0,1.0,Semiurban,N


In [20]:
df.head()

Unnamed: 0,Loan_ID,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status
0,LP001002,Male,No,0,Graduate,No,5849,0.0,,360.0,1.0,Urban,Y
1,LP001003,Male,Yes,1,Graduate,No,4583,1508.0,128.0,360.0,1.0,Rural,N
2,LP001005,Male,Yes,0,Graduate,Yes,3000,0.0,66.0,360.0,1.0,Urban,Y
3,LP001006,Male,Yes,0,Not Graduate,No,2583,2358.0,120.0,360.0,1.0,Urban,Y
4,LP001008,Male,No,0,Graduate,No,6000,0.0,141.0,360.0,1.0,Urban,Y


In [None]:
df.head(10)

In [21]:
df.tail()

Unnamed: 0,Loan_ID,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status
609,LP002978,Female,No,0,Graduate,No,2900,0.0,71.0,360.0,1.0,Rural,Y
610,LP002979,Male,Yes,3+,Graduate,No,4106,0.0,40.0,180.0,1.0,Rural,Y
611,LP002983,Male,Yes,1,Graduate,No,8072,240.0,253.0,360.0,1.0,Urban,Y
612,LP002984,Male,Yes,2,Graduate,No,7583,0.0,187.0,360.0,1.0,Urban,Y
613,LP002990,Female,No,0,Graduate,Yes,4583,0.0,133.0,360.0,0.0,Semiurban,N


In [None]:
df.tail(10)

In [22]:
df.shape

(614, 13)

In [23]:
df.describe()

Unnamed: 0,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History
count,614.0,614.0,592.0,600.0,564.0
mean,5403.459283,1621.245798,146.412162,342.0,0.842199
std,6109.041673,2926.248369,85.587325,65.12041,0.364878
min,150.0,0.0,9.0,12.0,0.0
25%,2877.5,0.0,100.0,360.0,1.0
50%,3812.5,1188.5,128.0,360.0,1.0
75%,5795.0,2297.25,168.0,360.0,1.0
max,81000.0,41667.0,700.0,480.0,1.0


In [25]:
df.rename(columns={'CoapplicantIncome': 'CA'}, inplace=True)
df.head()

Unnamed: 0,Loan_ID,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CA,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status
0,LP001002,Male,No,0,Graduate,No,5849,0.0,,360.0,1.0,Urban,Y
1,LP001003,Male,Yes,1,Graduate,No,4583,1508.0,128.0,360.0,1.0,Rural,N
2,LP001005,Male,Yes,0,Graduate,Yes,3000,0.0,66.0,360.0,1.0,Urban,Y
3,LP001006,Male,Yes,0,Not Graduate,No,2583,2358.0,120.0,360.0,1.0,Urban,Y
4,LP001008,Male,No,0,Graduate,No,6000,0.0,141.0,360.0,1.0,Urban,Y


In [26]:
df.drop(columns=['CA'], inplace=True)
#df.drop(index=[0, 1], inplace=True)  # Drop rows 0 and 1
df.head()

Unnamed: 0,Loan_ID,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status
0,LP001002,Male,No,0,Graduate,No,5849,,360.0,1.0,Urban,Y
1,LP001003,Male,Yes,1,Graduate,No,4583,128.0,360.0,1.0,Rural,N
2,LP001005,Male,Yes,0,Graduate,Yes,3000,66.0,360.0,1.0,Urban,Y
3,LP001006,Male,Yes,0,Not Graduate,No,2583,120.0,360.0,1.0,Urban,Y
4,LP001008,Male,No,0,Graduate,No,6000,141.0,360.0,1.0,Urban,Y


In [27]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 614 entries, 0 to 613
Data columns (total 12 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   Loan_ID           614 non-null    object 
 1   Gender            601 non-null    object 
 2   Married           611 non-null    object 
 3   Dependents        599 non-null    object 
 4   Education         614 non-null    object 
 5   Self_Employed     582 non-null    object 
 6   ApplicantIncome   614 non-null    int64  
 7   LoanAmount        592 non-null    float64
 8   Loan_Amount_Term  600 non-null    float64
 9   Credit_History    564 non-null    float64
 10  Property_Area     614 non-null    object 
 11  Loan_Status       614 non-null    object 
dtypes: float64(3), int64(1), object(8)
memory usage: 57.7+ KB


In [4]:
import numpy as np
import pandas as pd

In [5]:
df1 = pd.read_excel("/kaggle/input/model12/tensorflow2/default/1/Batch_report (13).xlsx")
df1

Unnamed: 0,Candidate Full Name,Email ID,Contact No,Admission Date,Course Name,Domain,Skill Possessed,10th Percentage,10th YOP,12th Percentage,...,Graduation YOP,Post Graduation,Post Graduation Percentage,Post Graduation YOP,Current City,Candidate Status,Final MOCK,Communication Remark,Technical Remark,Resume
0,Vikas Ghadge,ghadgevikas8@gmail.com,9673714108,20-09-2024,ITPD,Data Analytics,DataAnalytics,78.18,2011.0,53.18,...,2017.0,,,,pune,Completed 80% Training & Final MOCK Pending,PENDING,Good,Average,View Resume
1,Kumail Mehter,kumail.mehter22@gmail.com,7875940600,20-09-2024,ITPD,Data Analytics,DataAnalytics,60.46,2009.0,46.67,...,2014.0,,,,pune,Completed 80% Training & Final MOCK Pending,PENDING,Good,Average,View Resume
2,Avantika Jadhav,avantikajadhav492003@gmail.com,9762074834,20-09-2024,,,,,,,...,,,,,,In Training,PENDING,Poor,Poor,
3,Chaitanya Sawant,sawantchaitanya592@gmail.com,7517918868,20-09-2024,,,,,,,...,,,,,,Training Not Started,PENDING,Poor,Poor,
4,Nupur Pandya,nupurpandya8@gmail.com,8793348958,20-09-2024,UPGRADATION,Decided,BusinessAnalysis,56.0,2007.0,58.0,...,2014.0,OTHER,Between 50% and 60% OR CGPA between 5 and 6,2014.0,pune,Training Not Started,PENDING,Poor,Poor,View Resume
5,Kanchan Malode,kanchanmalode6@gmail.com,7756992407,20-07-2024,ITPD,Python Web Development,Sql,80.6,2014.0,59.08,...,2019.0,,,,nasik,Completed 80% Training & Final MOCK Completed,PENDING,Average,Average,
6,Divya Borse,divyaborse252@gmail.com,9119554031,20-09-2024,ITPD,Data Analytics,"C,C++,Java,UI",63.6,2019.0,73.0,...,2024.0,,,,jalgaon,In Training,PENDING,Poor,Poor,View Resume
7,Isha Nikam,ishanikam1213@gmail.com,7378343080,09-09-2024,ITPD,Python Web Development,Python,78.0,2018.0,57.0,...,2024.0,,,,nasik,Placed,PENDING,Good,Good,View Resume
8,Yadnyesh Chaudhari,yadnyeshchaudhari875@gmail.com,8766937956,04-09-2024,ITPD,Python Web Development,Sql,62.8,2018.0,66.0,...,2024.0,,,,jalgaon,Completed 80% Training & Final MOCK Pending,PENDING,Average,Good,View Resume
9,Priya Kamble,priyakamble1308@gmail.com,9028416444,10-09-2024,ITPD,Business Analyst + Data Analytics,Sql,60.0,2009.0,54.0,...,2014.0,OTHER,Above 60% OR CGPA above 6,2017.0,pune,Completed 80% Training & Final MOCK Pending,PENDING,Poor,Poor,
