In [5]:
import pandas as pd

In [7]:
my_dict = {
    "country": ["Brazil", "Russia", "India", "China", "South Africa"],
    "capital": ["Brasila", "Moscow", "New Delhi", "Beijing", "Pretoria"],
    "area":    [8.516, 17.10, 3.286, 9.597, 1.221],
    "population": [200.4, 143.5, 1252, 1357, 52.98]
}

In [9]:
brics = pd.DataFrame(my_dict)
brics.shape

(5, 4)

In [9]:
brics

Unnamed: 0,country,capital,area,population
0,Brazil,Brasila,8.516,200.4
1,Russia,Moscow,17.1,143.5
2,India,New Delhi,3.286,1252.0
3,China,Beijing,9.597,1357.0
4,South Africa,Pretoria,1.221,52.98


In [11]:
brics.index = ["BR", "RU", "IN", "CH", "SA"]
brics

Unnamed: 0,country,capital,area,population
BR,Brazil,Brasila,8.516,200.4
RU,Russia,Moscow,17.1,143.5
IN,India,New Delhi,3.286,1252.0
CH,China,Beijing,9.597,1357.0
SA,South Africa,Pretoria,1.221,52.98


### Advance data access

### Column Selection
#### 1). Series
A 1-dimensional labelled array. A bunch of series makes a Dataframe.

In [19]:
brics["country"]

0          Brazil
1          Russia
2           India
3           China
4    South Africa
Name: country, dtype: object

In [15]:
# Series : 
type(brics["country"])

pandas.core.series.Series

#### 2). Dataframe 
Creating a dataframe of fewer columns from a dataframe with more columns.

In [26]:
# To create a Dataframe of fewer columns from a bigger Dataframe you do the below
brics[["country"]]

Unnamed: 0,country
0,Brazil
1,Russia
2,India
3,China
4,South Africa


In [30]:
type(brics[['country']])

pandas.core.frame.DataFrame

In [32]:
# You can select as much columns as you wish, which is not possible for series, because a series is 1-D.
brics[["country", "population"]]

Unnamed: 0,country,population
0,Brazil,200.4
1,Russia,143.5
2,India,1252.0
3,China,1357.0
4,South Africa,52.98


#### Row Selection (selection by index)
Row selection can be achieved by:
1). Normal python slicing operation. 

In [40]:
brics[1:3]

Unnamed: 0,country,capital,area,population
1,Russia,Moscow,17.1,143.5
2,India,New Delhi,3.286,1252.0


### pandas loc and iloc methods.
To extend our operation, to enable us to make the selection for both columns and rows simultaneously we use the pandas **loc** and **iloc** methods.

#### loc- label-based


In [49]:
brics.loc[1]

country       Russia
capital       Moscow
area            17.1
population     143.5
Name: 1, dtype: object

In [51]:
# To view the information in a Dataframe format
brics.loc[[1]]

Unnamed: 0,country,capital,area,population
1,Russia,Moscow,17.1,143.5


In [55]:
# Selecting rows at random
brics.loc[[0, 2, 4]]

Unnamed: 0,country,capital,area,population
0,Brazil,Brasila,8.516,200.4
2,India,New Delhi,3.286,1252.0
4,South Africa,Pretoria,1.221,52.98


In [67]:
# Selecting rows and columns. This is allowed brics.loc[0:3, ["country", "population"]], but 
# this  is only allowed becuase the index is an integer type brics.loc[[0, 2,4], ["country", "population"]]
brics.loc[0:3, ["country", "population"]]

Unnamed: 0,country,population
0,Brazil,200.4
1,Russia,143.5
2,India,1252.0
3,China,1357.0


In [65]:
brics.loc[[0, 2,4], ["country", "population"]]

Unnamed: 0,country,population
0,Brazil,200.4
2,India,1252.0
4,South Africa,52.98


#### iloc- integer position-based
It is used to access data in both rows and columns with their respective integer index values. 
i.e **brics.iloc[[0, 2,4], ["country", "population"]]** will not work, but **brics.iloc[[0, 2,4], [0, 3]]**

In [74]:
brics.iloc[[0, 2,4], [0, 3]]

Unnamed: 0,country,population
0,Brazil,200.4
2,India,1252.0
4,South Africa,52.98


### Reading Data from CSV files

In [81]:
from pathlib import Path

# Get the path of the current script (test_script.py)
current_dir = Path.cwd()
data_path = current_dir / 'data' / 'AB_NYC_2019.csv'

# Print the absolute path for verification (optional)
print(f"Image path: {data_path.absolute()}")

Image path: C:\Users\endie\Documents\codes\python\practice\data-fundamentals\data\AB_NYC_2019.csv


In [89]:
pd_data = pd.read_csv(data_path, index_col='id')
pd_data.shape

(48895, 15)

In [91]:
pd_data.head()

Unnamed: 0_level_0,name,host_id,host_name,neighbourhood_group,neighbourhood,latitude,longitude,room_type,price,minimum_nights,number_of_reviews,last_review,reviews_per_month,calculated_host_listings_count,availability_365
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2539,Clean & quiet apt home by the park,2787,John,Brooklyn,Kensington,40.64749,-73.97237,Private room,149,1,9,2018-10-19,0.21,6,365
2595,Skylit Midtown Castle,2845,Jennifer,Manhattan,Midtown,40.75362,-73.98377,Entire home/apt,225,1,45,2019-05-21,0.38,2,355
3647,THE VILLAGE OF HARLEM....NEW YORK !,4632,Elisabeth,Manhattan,Harlem,40.80902,-73.9419,Private room,150,3,0,,,1,365
3831,Cozy Entire Floor of Brownstone,4869,LisaRoxanne,Brooklyn,Clinton Hill,40.68514,-73.95976,Entire home/apt,89,1,270,2019-07-05,4.64,1,194
5022,Entire Apt: Spacious Studio/Loft by central park,7192,Laura,Manhattan,East Harlem,40.79851,-73.94399,Entire home/apt,80,10,9,2018-11-19,0.1,1,0


In [93]:
pd_data_price_greater_than_200 = pd_data[pd_data['price'] > 200 ]
print("shape: ", pd_data_price_greater_than_200.shape)
pd_data_price_greater_than_200.head()

shape:  (8384, 15)


Unnamed: 0_level_0,name,host_id,host_name,neighbourhood_group,neighbourhood,latitude,longitude,room_type,price,minimum_nights,number_of_reviews,last_review,reviews_per_month,calculated_host_listings_count,availability_365
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2595,Skylit Midtown Castle,2845,Jennifer,Manhattan,Midtown,40.75362,-73.98377,Entire home/apt,225,1,45,2019-05-21,0.38,2,355
7097,Perfect for Your Parents + Garden,17571,Jane,Brooklyn,Fort Greene,40.69169,-73.97185,Entire home/apt,215,2,198,2019-06-28,1.72,1,321
7801,Sweet and Spacious Brooklyn Loft,21207,Chaya,Brooklyn,Williamsburg,40.71842,-73.95718,Entire home/apt,299,3,9,2011-12-28,0.07,1,0
14290,* ORIGINAL BROOKLYN LOFT *,56104,James,Brooklyn,Williamsburg,40.7042,-73.9356,Entire home/apt,228,3,82,2019-05-17,0.7,1,140
15396,Sunny & Spacious Chelsea Apartment,60278,Petra,Manhattan,Chelsea,40.74623,-73.9953,Entire home/apt,375,180,5,2018-11-03,0.12,1,180
