# Selection and Slicing (Fundamentals)

for using loc(), we need to create labels first in our data

In [2]:
import pandas as pd

### Pandas Series

In [77]:
# Pandas Series --> A Pandas Series is like a column in a table.
# It is a one-dimensional array holding data of any type.

a=[3,4,5,6,6,7,3]
ser=pd.Series(a)

print(ser)

0    3
1    4
2    5
3    6
4    6
5    7
6    3
dtype: int64


In [78]:
print(ser.tolist()) # tolist() just puts a series/dataframe in a list
# which makes it more readable at times

[3, 4, 5, 6, 6, 7, 3]


In [79]:
# creating labels
ser=pd.Series(a,index=['x','y','z','a','b','c','d'])
print(ser)

x    3
y    4
z    5
a    6
b    6
c    7
d    3
dtype: int64


### Pandas DataFrame

In [75]:
myData={
    'name': ['Alice', 'Bob', 'Charlie', 'David', 'Emily'],
    'age': [25, 30, 35, 40, 45],
    'city': ['New York', 'Paris', 'London', 'San Francisco', 'Tokyo'],
    'Nation':['USA','France','UK','USA','Japan']
    }

df=pd.DataFrame(myData) #load data into a DataFrame object
print(df)

      name  age           city  Nation
0    Alice   25       New York     USA
1      Bob   30          Paris  France
2  Charlie   35         London      UK
3    David   40  San Francisco     USA
4    Emily   45          Tokyo   Japan


In [90]:
# label the rows of dataframe
df.index = ['x', 'y', 'z', 'a', 'b']

In [91]:
# print the first 2 rows(observations)
df.head(2)

Unnamed: 0,name,age,city,Nation
x,Alice,25,New York,USA
y,Bob,30,Paris,France


In [92]:
# print the last 3 rows(observations)
df.tail(2)

Unnamed: 0,name,age,city,Nation
a,David,40,San Francisco,USA
b,Emily,45,Tokyo,Japan


In [93]:
df.describe() # describes some statistical aspects of the dataframe

Unnamed: 0,age
count,5.0
mean,35.0
std,7.905694
min,25.0
25%,30.0
50%,35.0
75%,40.0
max,45.0


In [94]:
df.info() # gives out Non-Null Count and Dtypes of each column in dataframe

<class 'pandas.core.frame.DataFrame'>
Index: 5 entries, x to b
Data columns (total 4 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   name    5 non-null      object
 1   age     5 non-null      int64 
 2   city    5 non-null      object
 3   Nation  5 non-null      object
dtypes: int64(1), object(3)
memory usage: 200.0+ bytes


# loc() [Label Based Indexing]
- loc() function  allows you to select data from a DataFrame by using labels, rather than integer-based indexing.
- Pandas use the loc attribute to return one or more specified row(s)/column(s), or individual elements

### Selections by Labels

In [96]:
#give the entire row labeled "y":
df.loc['y',:]

name         Bob
age           30
city       Paris
Nation    France
Name: y, dtype: object

In [85]:
#give the entire column labeled "city"
df.loc[:,'city']

0         New York
1            Paris
2           London
3    San Francisco
4            Tokyo
Name: city, dtype: object

In [97]:
#return the entire rows labeled "y" and "b"
df.loc[['y','b'],:]

Unnamed: 0,name,age,city,Nation
y,Bob,30,Paris,France
b,Emily,45,Tokyo,Japan


In [98]:
#return the entire columns labeled "name" and "city"
df.loc[:,['name','city']]

Unnamed: 0,name,city
x,Alice,New York
y,Bob,Paris
z,Charlie,London
a,David,San Francisco
b,Emily,Tokyo


#### additional [ ] was used above in loc() function because when using [ ], the result is a Pandas DataFrame

In [101]:
# return a particular element in the dataset

df.loc['z','name'] # return element row labeled 'z' & column labeled 'name'

'Charlie'

### Slicing by Labels

In [100]:
# return rows labels 1 to 3 from columns labeled 'name' to 'city';
df.loc['x':'z', 'name':'city']

Unnamed: 0,name,age,city
x,Alice,25,New York
y,Bob,30,Paris
z,Charlie,35,London


### Selection by Boolean Indexing

In [102]:
df.loc[df['age']>35]

Unnamed: 0,name,age,city,Nation
a,David,40,San Francisco,USA
b,Emily,45,Tokyo,Japan


### Assignment of new values

In [103]:
df.loc['a', 'age'] = 36

# Multi-Indexing
- also known as Hierarchical Indexing, is a feature in Pandas that allows you to use multiple levels of indexing on a single axis.
- It enables you to work with high-dimensional data in a more intuitive way and perform advanced data manipulation and analysis.



In [109]:
df.set_index(['name','age'], inplace=True)  #set 'name' and 'age' columns as the MultiIndex for the DataFrame

In [111]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,city,Nation
name,age,Unnamed: 2_level_1,Unnamed: 3_level_1
Alice,25,New York,USA
Bob,30,Paris,France
Charlie,35,London,UK
David,36,San Francisco,USA
Emily,45,Tokyo,Japan
