# Loading Data in pandas

CSV version of data can be downloaded from an Excel spreadsheet, a SQL database or a Google Sheet.

### Loading a DataFrame

In [113]:
import pandas as pd
credit_records = pd.read_csv("credit_records.csv", index_col=0)
print(credit_records.head())

            suspect         location             date         item  price
0    Kirstine Smith   Groceries R Us   January 6 2018     broccoli   1.25
1      Gertrude Cox  Petroleum Plaza   January 6 2018  fizzy drink   1.90
2  Fred Frequentist   Groceries R Us   January 6 2018     broccoli   1.25
3      Gertrude Cox   Groceries R Us  January 12 2018     broccoli   1.25
4    Kirstine Smith    Clothing Club   January 9 2018        shirt  14.25


### Inspecting a DataFrame

In [60]:
print(credit_records.info())

<class 'pandas.core.frame.DataFrame'>
Int64Index: 104 entries, 0 to 103
Data columns (total 5 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   suspect   104 non-null    object 
 1   location  104 non-null    object 
 2   date      104 non-null    object 
 3   item      104 non-null    object 
 4   price     104 non-null    float64
dtypes: float64(1), object(4)
memory usage: 4.9+ KB
None


# Selecting columns

In [61]:
credit_records.price.sum()

908.9000000000001

### Two methods for selecting columns

In [78]:
items = credit_records["item"]
print(items)

items = credit_records.item
print(items)

0         broccoli
1      fizzy drink
2         broccoli
3         broccoli
4            shirt
          ...     
99           shirt
100          pants
101          dress
102         burger
103      cucumbers
Name: item, Length: 104, dtype: object
0         broccoli
1      fizzy drink
2         broccoli
3         broccoli
4            shirt
          ...     
99           shirt
100          pants
101          dress
102         burger
103      cucumbers
Name: item, Length: 104, dtype: object


### More column selection mistakes

In [82]:
mpr = pd.read_csv("mpr.csv", index_col=0)
print(mpr.info())

name = mpr["Dog Name"]

is_missing = mpr["Status"]

print(name)
print(is_missing)

<class 'pandas.core.frame.DataFrame'>
Int64Index: 6 entries, 0 to 5
Data columns (total 5 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   Dog Name    6 non-null      object
 1   Owner Name  5 non-null      object
 2   Dog Breed   6 non-null      object
 3   Status      6 non-null      object
 4   Age         6 non-null      int64 
dtypes: int64(1), object(4)
memory usage: 288.0+ bytes
None
0      Bayes
1    Sigmoid
2     Sparky
3    Theorem
4        Ned
5      Benny
Name: Dog Name, dtype: object
0    Still Missing
1    Still Missing
2            Found
3            Found
4    Still Missing
5            Found
Name: Status, dtype: object


## Selecting rows with logic

### Logical testing

In [83]:
height_inches = 65
plate1 = "FRQ123"
fur_color = "blonde"

print(height_inches > 70)
print(plate1 == "FRQ123")
print(fur_color != "brown")

False
True
True


### Selecting missing puppies

In [84]:
mpr


Unnamed: 0,Dog Name,Owner Name,Dog Breed,Status,Age
0,Bayes,DataCamp,Golden Retriever,Still Missing,1
1,Sigmoid,,Dachshund,Still Missing,2
2,Sparky,Dr. Apache,Border Collie,Found,3
3,Theorem,Joseph-Louis Lagrange,French Bulldog,Found,4
4,Ned,Tim Oliphant,Shih Tzu,Still Missing,2
5,Benny,Hillary Green-Lerman,Poodle,Found,3


In [88]:
greater_than_2 = mpr[mpr.Age > 2]
print(greater_than_2)

still_missing = mpr[mpr.Status == "Still Missing"]
print("\n",still_missing)

not_poodle = mpr[mpr["Dog Breed"] != "Poodle"]
print("\n",not_poodle)

  Dog Name             Owner Name       Dog Breed Status  Age
2   Sparky             Dr. Apache   Border Collie  Found    3
3  Theorem  Joseph-Louis Lagrange  French Bulldog  Found    4
5    Benny   Hillary Green-Lerman          Poodle  Found    3

   Dog Name    Owner Name         Dog Breed         Status  Age
0    Bayes      DataCamp  Golden Retriever  Still Missing    1
1  Sigmoid           NaN         Dachshund  Still Missing    2
4      Ned  Tim Oliphant          Shih Tzu  Still Missing    2

   Dog Name             Owner Name         Dog Breed         Status  Age
0    Bayes               DataCamp  Golden Retriever  Still Missing    1
1  Sigmoid                    NaN         Dachshund  Still Missing    2
2   Sparky             Dr. Apache     Border Collie          Found    3
3  Theorem  Joseph-Louis Lagrange    French Bulldog          Found    4
4      Ned           Tim Oliphant          Shih Tzu  Still Missing    2


### Narrowing the list of suspects

In [162]:
purchase = credit_records[credit_records.location == "Pet Paradise"]
print(purchase.sort_values("suspect"))

              suspect      location             date          item  price
2    Fred Frequentist  Pet Paradise  January 14 2018    dog treats   8.75
100  Fred Frequentist  Pet Paradise  January 14 2018    dog collar  12.25
1        Gertrude Cox  Pet Paradise  January 13 2018  dog chew toy   5.95
3        Gertrude Cox  Pet Paradise  January 13 2018    dog treats   8.75
