# Pandas-DataFrame And Series
* Pandas is a powerful data manipulation library in Python, widely used for data analysis and data cleaning. It provides two primary data structures: Series and DataFrame. A Series is a one-dimensional array-like object, while a DataFrame is a two-dimensional, size-mutable, and potentially heterogeneous tabular data structure with labeled axes (rows and columns).

In [1]:
import pandas as pd

In [3]:
## Series
##A Pandas Series is a one-dimensional array-like object that can hold any data type. It is similar to a column in a table.
data=pd.Series([1,2,3,4])
print(data,type(data))

0    1
1    2
2    3
3    4
dtype: int64 <class 'pandas.core.series.Series'>


In [4]:
# Create a series from dictionary
data={'a':1,'b':2,'c':3}
series_dict=pd.Series(data)
print(series_dict)

a    1
b    2
c    3
dtype: int64


In [5]:
data=[10,20,30]
index=['a','b','c']
pd.Series(data,index=index)

a    10
b    20
c    30
dtype: int64

In [6]:
# Dataframe
# Create dataframe from dictionary of list
data={
    'Name':["Vinay","Babu","Gorantla"],
    'Age':[31,30,28],
    'Country':["United Kingdom","India","America"]
}

df=pd.DataFrame(data)
print(df,type(df))

       Name  Age         Country
0     Vinay   31  United Kingdom
1      Babu   30           India
2  Gorantla   28         America <class 'pandas.core.frame.DataFrame'>


In [13]:
# Create dataframe from list of dictionaries
data=[
    {'Name':'Vinay','Age':31,'Country':'United Kingdom'},
    {'Name':'Babu','Age':24,'Country':'India'},
    {'Name':'Gorantla','Age':27,'Country':'America'}
]
df=pd.DataFrame(data)
print(df)
print(type(df))

       Name  Age         Country
0     Vinay   31  United Kingdom
1      Babu   24           India
2  Gorantla   27         America
<class 'pandas.core.frame.DataFrame'>


In [15]:
sales_df=pd.read_csv('sales_data.csv')
sales_df.head()

Unnamed: 0,Transaction ID,Date,Product Category,Product Name,Units Sold,Unit Price,Total Revenue,Region,Payment Method
0,10001,2024-01-01,Electronics,iPhone 14 Pro,2,999.99,1999.98,North America,Credit Card
1,10002,2024-01-02,Home Appliances,Dyson V11 Vacuum,1,499.99,499.99,Europe,PayPal
2,10003,2024-01-03,Clothing,Levi's 501 Jeans,3,69.99,209.97,Asia,Debit Card
3,10004,2024-01-04,Books,The Da Vinci Code,4,15.99,63.96,North America,Credit Card
4,10005,2024-01-05,Beauty Products,Neutrogena Skincare Set,1,89.99,89.99,Europe,PayPal


In [16]:
sales_df.tail(10)

Unnamed: 0,Transaction ID,Date,Product Category,Product Name,Units Sold,Unit Price,Total Revenue,Region,Payment Method
230,10231,2024-08-18,Clothing,Adidas Originals Trefoil Hoodie,4,64.99,259.96,Asia,Debit Card
231,10232,2024-08-19,Books,Dune by Frank Herbert,2,9.99,19.98,North America,Credit Card
232,10233,2024-08-20,Beauty Products,Fresh Sugar Lip Treatment,1,24.0,24.0,Europe,PayPal
233,10234,2024-08-21,Sports,Hydro Flask Standard Mouth Water Bottle,3,32.95,98.85,Asia,Credit Card
234,10235,2024-08-22,Electronics,Bose QuietComfort 35 II Wireless Headphones,1,299.0,299.0,North America,Credit Card
235,10236,2024-08-23,Home Appliances,Nespresso Vertuo Next Coffee and Espresso Maker,1,159.99,159.99,Europe,PayPal
236,10237,2024-08-24,Clothing,Nike Air Force 1 Sneakers,3,90.0,270.0,Asia,Debit Card
237,10238,2024-08-25,Books,The Handmaid's Tale by Margaret Atwood,3,10.99,32.97,North America,Credit Card
238,10239,2024-08-26,Beauty Products,Sunday Riley Luna Sleeping Night Oil,1,55.0,55.0,Europe,PayPal
239,10240,2024-08-27,Sports,Yeti Rambler 20 oz Tumbler,2,29.99,59.98,Asia,Credit Card


In [17]:
sales_df['Region']

0      North America
1             Europe
2               Asia
3      North America
4             Europe
           ...      
235           Europe
236             Asia
237    North America
238           Europe
239             Asia
Name: Region, Length: 240, dtype: object

In [18]:
df.head()

Unnamed: 0,Name,Age,Country
0,Vinay,31,United Kingdom
1,Babu,24,India
2,Gorantla,27,America


In [20]:
df.loc[0]

Name                Vinay
Age                    31
Country    United Kingdom
Name: 0, dtype: object

In [21]:
df.loc[1]

Name        Babu
Age           24
Country    India
Name: 1, dtype: object

In [22]:
df.iloc[0]

Name                Vinay
Age                    31
Country    United Kingdom
Name: 0, dtype: object

In [24]:
# Add new column
df['Salary']=[10000,60000,100000]
df.head()

Unnamed: 0,Name,Age,Country,Salary
0,Vinay,31,United Kingdom,10000
1,Babu,24,India,60000
2,Gorantla,27,America,100000


In [26]:
# Remove Column
df.drop(['Salary'],axis=1,inplace=True)
df.head()

Unnamed: 0,Name,Age,Country
0,Vinay,31,United Kingdom
1,Babu,24,India
2,Gorantla,27,America


In [27]:
# Increment Age by 1
df['Age']=df['Age']+1
df.head()

Unnamed: 0,Name,Age,Country
0,Vinay,32,United Kingdom
1,Babu,25,India
2,Gorantla,28,America


In [28]:
df.drop(0,inplace=True)
df.head()

Unnamed: 0,Name,Age,Country
1,Babu,25,India
2,Gorantla,28,America


In [29]:
sales_df.describe()

Unnamed: 0,Transaction ID,Units Sold,Unit Price,Total Revenue
count,240.0,240.0,240.0,240.0
mean,10120.5,2.158333,236.395583,335.699375
std,69.42622,1.322454,429.446695,485.804469
min,10001.0,1.0,6.5,6.5
25%,10060.75,1.0,29.5,62.965
50%,10120.5,2.0,89.99,179.97
75%,10180.25,3.0,249.99,399.225
max,10240.0,10.0,3899.99,3899.99


In [30]:
sales_df.describe(include='object')

Unnamed: 0,Date,Product Category,Product Name,Region,Payment Method
count,240,240,240,240,240
unique,240,6,232,3,3
top,2024-01-01,Electronics,Dyson Supersonic Hair Dryer,North America,Credit Card
freq,1,40,2,80,120
