<i>Series</i>

In [None]:
# series is a one dimensional array-like structure provided by the pandas library in Python. It is used to store and manipulate a sequence of data points, which can be of various data types such as integers, floats, strings, or even Python objects. Each element in a series is associated with an index, which allows for easy access and manipulation of the data.

In [6]:
import numpy as np 
import pandas as pd

In [7]:
labels = ['a', 'b', 'c']
my_data = [10, 20, 30]
arr = np.array(my_data)
d = {'a':10, 'b':20, 'c':30}

In [8]:
pd.Series(my_data)

0    10
1    20
2    30
dtype: int64

In [9]:
pd.Series(data=my_data,index=labels)

a    10
b    20
c    30
dtype: int64

In [10]:
pd.Series(arr)

0    10
1    20
2    30
dtype: int64

In [11]:
pd.Series(d)

a    10
b    20
c    30
dtype: int64

<h3>DataFrame</h3>

In [16]:
data = {
'Name': ['John', 'Anna', 'Peter', 'Linda'],
'Age': [28, 34, 29, 42],
'City': ['New York', 'Paris', 'Berlin', 'London'],
'Salary': [65000, 70000, 62000, 85000]  
}
df = pd.DataFrame(data)
df

Unnamed: 0,Name,Age,City,Salary
0,John,28,New York,65000
1,Anna,34,Paris,70000
2,Peter,29,Berlin,62000
3,Linda,42,London,85000


In [18]:
data_list = [
['John', 28, 'New York', 65000],
['Anna', 34, 'Paris', 70000],
['Peter', 29, 'Berlin', 62000],
['Linda', 42, 'London', 85000]
]
df2 = pd.DataFrame(data_list)
df2

Unnamed: 0,0,1,2,3
0,John,28,New York,65000
1,Anna,34,Paris,70000
2,Peter,29,Berlin,62000
3,Linda,42,London,85000


In [21]:
columns = ['Name', 'Age', 'City', 'Salary']
df2 = pd.DataFrame(data_list, columns=columns)
df2

Unnamed: 0,Name,Age,City,Salary
0,John,28,New York,65000
1,Anna,34,Paris,70000
2,Peter,29,Berlin,62000
3,Linda,42,London,85000


<i>  Selection and indexing of columns </i>


In [22]:
df2

Unnamed: 0,Name,Age,City,Salary
0,John,28,New York,65000
1,Anna,34,Paris,70000
2,Peter,29,Berlin,62000
3,Linda,42,London,85000


In [23]:
df2['Name']

0     John
1     Anna
2    Peter
3    Linda
Name: Name, dtype: object

In [24]:
df2[['Name', 'City']]

Unnamed: 0,Name,City
0,John,New York
1,Anna,Paris
2,Peter,Berlin
3,Linda,London


<i>Creating a new Columns<i>

In [34]:
df2['Designation'] = ['Manager', 'Analyst', 'Clerk', 'Director']
df2

Unnamed: 0,Name,Age,City,Salary,Designation
0,John,28,New York,65000,Manager
1,Anna,34,Paris,70000,Analyst
2,Peter,29,Berlin,62000,Clerk
3,Linda,42,London,85000,Director


<i>Removing of Column</i>

In [None]:
df2.drop('Designation', axis=1 ,inplace=True) # inplace=True to make the changes in the same dataframe
df2

Unnamed: 0,Name,Age,City,Salary
0,John,28,New York,65000
1,Anna,34,Paris,70000
2,Peter,29,Berlin,62000
3,Linda,42,London,85000


In [36]:
df2.drop(0, axis=0)

Unnamed: 0,Name,Age,City,Salary
1,Anna,34,Paris,70000
2,Peter,29,Berlin,62000
3,Linda,42,London,85000


In [None]:
df2.loc[0] # Access by label

Name          John
Age             28
City      New York
Salary       65000
Name: 0, dtype: object

In [39]:
df2

Unnamed: 0,Name,Age,City,Salary
0,John,28,New York,65000
1,Anna,34,Paris,70000
2,Peter,29,Berlin,62000
3,Linda,42,London,85000


In [41]:
df2.loc[[1,2]]

Unnamed: 0,Name,Age,City,Salary
1,Anna,34,Paris,70000
2,Peter,29,Berlin,62000


In [None]:
df2.loc[[0,1]][['Name', 'Salary']] #first select rows then columns

Unnamed: 0,Name,Salary
0,John,65000
1,Anna,70000


<i>Conditional Selection</i>

In [43]:
df2

Unnamed: 0,Name,Age,City,Salary
0,John,28,New York,65000
1,Anna,34,Paris,70000
2,Peter,29,Berlin,62000
3,Linda,42,London,85000


In [44]:
#i only want to select those people age is greater than 30
df2[df2['Age'] > 30]

Unnamed: 0,Name,Age,City,Salary
1,Anna,34,Paris,70000
3,Linda,42,London,85000


In [45]:
#i only want to select those people age is greater than 30 and city is paris
df2[(df2['Age'] > 30) & (df2['City'] == 'Paris')]


Unnamed: 0,Name,Age,City,Salary
1,Anna,34,Paris,70000
