# Accessing Data from Loaded in Files

In [1]:
import pandas as pd

In [None]:
# Import data sets or local ones and input to data frames
coffee = pd.read_csv('https://raw.githubusercontent.com/KeithGalli/complete-pandas-tutorial/refs/heads/master/warmup-data/coffee.csv')
results = pd.read_parquet('./data/results.parquet')
bios = pd.read_csv('https://raw.githubusercontent.com/KeithGalli/complete-pandas-tutorial/refs/heads/master/data/bios.csv')

In [5]:
# See a dataset
coffee

Unnamed: 0,Day,Coffee Type,Units Sold
0,Monday,Espresso,25
1,Monday,Latte,15
2,Tuesday,Espresso,30
3,Tuesday,Latte,20
4,Wednesday,Espresso,35
5,Wednesday,Latte,25
6,Thursday,Espresso,40
7,Thursday,Latte,30
8,Friday,Espresso,45
9,Friday,Latte,35


In [None]:
# Accessing data, similar to a normal python list

# Tail works the same as head but it gets the last 5 by default
coffee.tail()

# Access a random number of rows, not in order
coffee.sample(7)

# loc allows us to filter by rows and columns in this format: df.loc[rows, cols]
coffee.loc[0]                               # Gives us the first rows worth of data
coffee.loc[[0,1,2]]                         # First three rows
coffee.loc[0:2]                             # Same but in slice format
coffee.loc[::-1]                            # Backwards
coffee.loc[0::2]                            # Evens
coffee.loc[3:6, ["Day", "Units Sold"]]      # Rows 3 through 6 and only those two columns

# iloc functions the same as loc but the columns are indexes not their labels
coffee.iloc[::, [0]]                        # All rows but just the first indexed column
coffee.iloc[3::-1, [1,2]]                   # From row 3 to 0, only those two columns 

# Pulling data
coffee["Day"]                        # Pull the whole column of values
coffee.Day                           # Only works for single word columns

0        Monday
1        Monday
2       Tuesday
3       Tuesday
4     Wednesday
5     Wednesday
6      Thursday
7      Thursday
8        Friday
9        Friday
10     Saturday
11     Saturday
12       Sunday
13       Sunday
Name: Day, dtype: object

In [46]:
# Change the row index to the day of the week, from ints to a string
coffee.index = coffee["Day"]

coffee.loc["Monday":"Wednesday"]        # The changed index allows us to sort days instead of indexes now
coffee.loc["Wednesday":, "Units Sold"]  # Wed -> onward, number of units sold

Day
Wednesday    35
Wednesday    25
Thursday     40
Thursday     30
Friday       45
Friday       35
Saturday     45
Saturday     35
Sunday       45
Sunday       35
Name: Units Sold, dtype: int64

In [None]:
# Edit values in the df
coffee.loc[1, "Units Sold"] = 10        # Changes the index 1 row, value at Units Sold
coffee.loc[1:3, "Units Sold"] = 15      # Change a range
print(coffee.at[0, "Coffee Type"])      # Pulls ONE specific value              

coffee.head()

Espresso


Unnamed: 0,Day,Coffee Type,Units Sold
0,Monday,Espresso,25
1,Monday,Latte,15
2,Tuesday,Espresso,15
3,Tuesday,Latte,15
4,Wednesday,Espresso,35


In [15]:
# Soring values
coffee.sort_values("Units Sold")                            # Low to high
coffee.sort_values("Units Sold", ascending=False)           # High to low
coffee.sort_values(["Units Sold", "Coffee Type"])           # Sort by one, then another too
# Sort by units sold decending then coffee type ascending 
coffee.sort_values(["Units Sold", "Coffee Type"], ascending=[0,1])

Unnamed: 0,Day,Coffee Type,Units Sold
8,Friday,Espresso,45
10,Saturday,Espresso,45
12,Sunday,Espresso,45
6,Thursday,Espresso,40
4,Wednesday,Espresso,35
9,Friday,Latte,35
11,Saturday,Latte,35
13,Sunday,Latte,35
7,Thursday,Latte,30
0,Monday,Espresso,25
