# Pandas - Data Frame And Series

Pandas is a powerful data manipulation library in Python, widely used for data analysis and data cleaning. It provides two primary data strutures: Series and DataFrame. A Series is a one-dimensional array-like object, while a DataFrame is a two-dimensional, size-mutable and potentially hetrogeneous tabular data structure with labeled axes (rows and columns).

In [None]:
import pandas as pd

# Series
# A Pandas Series is a one-dimensional array-like object that can hold any data type. It is similiar to column in a table.

data = [1,2,3,4,5]
series = pd.Series(data)
print("Series \n", series)

Series 
 0    1
1    2
2    3
3    4
4    5
dtype: int64


In [4]:
# Create a Series from dictionary
data = {'a': 1, 'b': 2, 'c': 3}
series_dict = pd.Series(data)
print(series_dict)


a    1
b    2
c    3
dtype: int64


In [5]:
data = [10, 20, 30]
index = ['a', 'b', 'c']
pd.Series(data, index = index)


a    10
b    20
c    30
dtype: int64

In [None]:
# Dataframe
# Create a Dataframe from a dictionary of list

data = {
    'Name': ['Bhavesh', 'Shivam', 'Zeeshan'],
    'Age': [26,26,26],
    'City': ['Bengaluru', 'Bengaluru', 'Bengaluru']
}

df = pd.DataFrame(data)
print(df)
print(type(df))

      Name  Age       City
0  Bhavesh   26  Bengaluru
1   Shivam   26  Bengaluru
2  Zeeshan   26  Bengaluru
<class 'pandas.DataFrame'>


In [9]:
import numpy as np

np.array(df)

array([['Bhavesh', 26, 'Bengaluru'],
       ['Shivam', 26, 'Bengaluru'],
       ['Zeeshan', 26, 'Bengaluru']], dtype=object)

In [11]:
# Create a Data Frame From a List of Dictionaries

data = [{'Name': 'Bhavesh', 'Age': 26, 'City': 'Bengaluru'},
{'Name': 'Bhavesh', 'Age': 26, 'City': 'Bengaluru'},
{'Name': 'Bhavesh', 'Age': 26, 'City': 'Bengaluru'},
{'Name': 'Bhavesh', 'Age': 26, 'City': 'Bengaluru'},
]

df = pd.DataFrame(data)
print(df)
print(type(df))

      Name  Age       City
0  Bhavesh   26  Bengaluru
1  Bhavesh   26  Bengaluru
2  Bhavesh   26  Bengaluru
3  Bhavesh   26  Bengaluru
<class 'pandas.DataFrame'>


In [None]:
# Read CSE File
df = pd.read_csv('sales_data.csv')
df.head(5)

df.tail(5)


In [None]:
# Acessing Data From Data Frame
df


Unnamed: 0,Name,Age,City
0,Bhavesh,26,Bengaluru
1,Bhavesh,26,Bengaluru
2,Bhavesh,26,Bengaluru
3,Bhavesh,26,Bengaluru


In [14]:
df['Name']
type(df['Name'])


pandas.Series

In [15]:
df.loc[0]

Name      Bhavesh
Age            26
City    Bengaluru
Name: 0, dtype: object

In [16]:
df.iloc[0]

Name      Bhavesh
Age            26
City    Bengaluru
Name: 0, dtype: object

In [19]:
df.iloc[0][1]

KeyError: 1

In [20]:
# Acessing a specified element

df['Name']

0    Bhavesh
1    Bhavesh
2    Bhavesh
3    Bhavesh
Name: Name, dtype: str

In [21]:
df.at[1,'Age']

np.int64(26)

In [22]:
df.at[2,'Name']

'Bhavesh'

In [23]:
df.iat[2,2]

'Bengaluru'

In [24]:
df


Unnamed: 0,Name,Age,City
0,Bhavesh,26,Bengaluru
1,Bhavesh,26,Bengaluru
2,Bhavesh,26,Bengaluru
3,Bhavesh,26,Bengaluru


In [None]:
# Data Manipulation with Dataframe

In [30]:
df['Salary']=[5000,6000,7000]


ValueError: Length of values (3) does not match length of index (4)

In [32]:
import pandas as pd

df = pd.DataFrame({
    'Name': ['A', 'B', 'C']
})

df['Salary'] = [5000, 6000, 7000]
df['Salaryy'] = [5000, 6000, 7000]
print(df)

  Name  Salary  Salaryy
0    A    5000     5000
1    B    6000     6000
2    C    7000     7000


In [36]:
df.drop('Salaryy', axis=1)

Unnamed: 0,Name,Salary
0,A,5000
1,B,6000
2,C,7000


In [37]:
df


Unnamed: 0,Name,Salary,Salaryy
0,A,5000,5000
1,B,6000,6000
2,C,7000,7000


In [39]:
# Permanent drop
df.drop('Salaryy', axis = 1, inplace = True)

In [40]:
df


Unnamed: 0,Name,Salary
0,A,5000
1,B,6000
2,C,7000


In [41]:
# Add salary to the column
df['Salary'] = df['Salary'] + 5000

In [42]:
df


Unnamed: 0,Name,Salary
0,A,10000
1,B,11000
2,C,12000


In [43]:
df.drop(0)

Unnamed: 0,Name,Salary
1,B,11000
2,C,12000


In [44]:
df

Unnamed: 0,Name,Salary
0,A,10000
1,B,11000
2,C,12000


In [45]:
df.drop(0,inplace=True)

In [46]:
df

Unnamed: 0,Name,Salary
1,B,11000
2,C,12000


In [48]:
# Display the data types of each column
print("Data types:\n", df.dtypes)

# Describe the DataFrame
print("Statistical Summary:\n", df.describe())

Data types:
 Name        str
Salary    int64
dtype: object
Statistical Summary:
              Salary
count      2.000000
mean   11500.000000
std      707.106781
min    11000.000000
25%    11250.000000
50%    11500.000000
75%    11750.000000
max    12000.000000


In [49]:
df.describe()

Unnamed: 0,Salary
count,2.0
mean,11500.0
std,707.106781
min,11000.0
25%,11250.0
50%,11500.0
75%,11750.0
max,12000.0
