# Load Data into Pandas DataFrame
See [this](http://pandas.pydata.org/pandas-docs/stable/10min.html) quickstart guide to pandas to get started.

In [None]:
%matplotlib inline
import pandas as pd
import matplotlib.pyplot as plt

Load in World Food Facts data to start working with pandas. View the Readme file to choose columns of interest. Avoid  columns 0, 3, 5, 27, and 36.

In [None]:
import os
fname = os.path.join("world-food-facts", "FoodFacts.csv")

# load in a subset of columns
columns_to_load = list(range(7,23)) + list(range(40,158))
foodfacts = pd.read_csv(fname, usecols = columns_to_load)

# Check that the data is loaded properly

In [None]:
# view the dimensions of the dataframe
foodfacts.shape

In [None]:
# print first 10 rows
foodfacts.head(10)

In [None]:
# print last 10 rows
foodfacts.tail(10)

In [None]:
# print summary 
foodfacts.describe()

# Select rows and columns

In [None]:
# return a slice of the rows
foodfacts.iloc[80:86]

In [None]:
# return one row by index
foodfacts.iloc[83]

In [None]:
# choose particular rows and columns by number
foodfacts.iloc[[83, 85],[0,2]]

In [None]:
# select a single column
foodfacts.product_name
# or 
foodfacts["product_name"]

In [None]:
# return new data frame with only named columns 
foodfacts[['product_name', 'categories', 'serving_size', 'energy_100g']]

In [None]:
# same as above, but now assign a name so we can use the returned dataframe
foodfacts_energy = foodfacts[['product_name', 'categories', 'serving_size', 'energy_100g', 'energy_from_fat_100g']]

In [None]:
# return a data frame with columns chosen by number
foodfacts[[1,2,8]] # note that column numbers of data frame do not match original

In [None]:
# subset rows by a variable value
foodfacts_energy[foodfacts_energy.energy_100g > 0]

In [None]:
# subset rows by a logical function of two variable values
foodfacts_energy[(foodfacts_energy.energy_100g > 0) & (foodfacts_energy.energy_from_fat_100g > 0)]

In [None]:
# sort the dataframe by a given column, highest value first
foodfacts_energy.sort_values('energy_100g', ascending=False)

# Graph the data

In [None]:
# make a scatter plot
foodfacts.plot("additives_n", "ingredients_that_may_be_from_palm_oil_n", "scatter")
plt.xlabel("additives")
plt.ylabel("ingredients that may be from palm oil")
xlim = plt.xlim(0,26)
ylim = plt.ylim(0,7)