# Fire Up GraphLab Create

We always start with this line before using any part of GraphLab Create. It can take up to 30 seconds to load the GraphLab library - be patient!

The first time you use GraphLab create, you must enter a product key to license the software for non-commerical academic use. To register for a free one-year academic license and obtain your key, go to [dato.com](https://dato.com/download/academic.html).

In [1]:
# Ignore To Use Pandas
# import graphlab
# # Set product key on this computer. After running this cell, you will not need to re-enter your product key. 
# graphlab.product_key.set_product_key('your product key here')

# # Limit number of worker processes. This preserves system memory, which prevents hosted notebooks from crashing.
# graphlab.set_runtime_config('GRAPHLAB_DEFAULT_NUM_PYLAMBDA_WORKERS', 4)

# # Output active product key.
# graphlab.product_key.get_product_key()

# Pandas
import pandas as pd

# Load Tabular Data Set

In [2]:
#sf = graphlab.SFrame('people-example.csv')

# Pandas
# Load the CSV file
sf = pd.read_csv('people-example.csv')

# SFrame (DataFrame) Basic

In [3]:
sf # we can view first few lines of table

Unnamed: 0,First Name,Last Name,Country,age
0,Bob,Smith,United States,24
1,Alice,Williams,Canada,23
2,Malcolm,Jone,England,22
3,Felix,Brown,USA,23
4,Alex,Cooper,Poland,23
5,Tod,Campbell,United States,22
6,Derek,Ward,Switzerland,25


In [4]:
sf.tail()  # view end of the table

Unnamed: 0,First Name,Last Name,Country,age
2,Malcolm,Jone,England,22
3,Felix,Brown,USA,23
4,Alex,Cooper,Poland,23
5,Tod,Campbell,United States,22
6,Derek,Ward,Switzerland,25


# GraphLab Canvas

In [None]:
# .show() visualizes any data structure in GraphLab Create
# If you want Canvas visualization to show up on this notebook, 
# add this line:
#graphlab.canvas.set_target('ipynb')

# Pandas have no canvas, use matplotlib?

In [None]:
#sf['age'].show(view='Categorical')

# Inspect Dataset Column

In [5]:
sf['Country']

0    United States
1           Canada
2          England
3              USA
4           Poland
5    United States
6      Switzerland
Name: Country, dtype: object

In [6]:
sf['age']

0    24
1    23
2    22
3    23
4    23
5    22
6    25
Name: age, dtype: int64

Some simple columnar operations

In [7]:
# Find the mean of the age column
sf['age'].mean()

23.142857142857142

In [8]:
# Find the max of the age column
sf['age'].max()

25

# Create New Columns For SFrame (DataFrame)

In [9]:
sf

Unnamed: 0,First Name,Last Name,Country,age
0,Bob,Smith,United States,24
1,Alice,Williams,Canada,23
2,Malcolm,Jone,England,22
3,Felix,Brown,USA,23
4,Alex,Cooper,Poland,23
5,Tod,Campbell,United States,22
6,Derek,Ward,Switzerland,25


In [10]:
# Create a new column "Full Name" by concatenate "First Name" and "Last Name"
sf['Full Name'] = sf['First Name'] + ' ' + sf['Last Name']

In [11]:
sf

Unnamed: 0,First Name,Last Name,Country,age,Full Name
0,Bob,Smith,United States,24,Bob Smith
1,Alice,Williams,Canada,23,Alice Williams
2,Malcolm,Jone,England,22,Malcolm Jone
3,Felix,Brown,USA,23,Felix Brown
4,Alex,Cooper,Poland,23,Alex Cooper
5,Tod,Campbell,United States,22,Tod Campbell
6,Derek,Ward,Switzerland,25,Derek Ward


In [12]:
# Multiple the age column by itself
sf['age'] * sf['age']

0    576
1    529
2    484
3    529
4    529
5    484
6    625
Name: age, dtype: int64

# Use Apply Function For Advance Data Transformation

In [13]:
sf['Country']

0    United States
1           Canada
2          England
3              USA
4           Poland
5    United States
6      Switzerland
Name: Country, dtype: object

In [None]:
#sf['Country'].show()

In [15]:
def transform_country(country):
    """Transform Country
    
    Args: country (str): The country name
    Returns: A string of the transformed country name
    """
    if country == 'USA':
        return 'United States'
    else:
        return country

In [16]:
transform_country('Brazil')

'Brazil'

In [17]:
transform_country('Brasil')

'Brasil'

In [18]:
transform_country('USA')

'United States'

In [19]:
# Take the "Country" column, execute the "transform_country" function to each row
sf['Country'].apply(transform_country)

0    United States
1           Canada
2          England
3    United States
4           Poland
5    United States
6      Switzerland
Name: Country, dtype: object

In [20]:
# Update the "Country" column with the "transform_country" function
sf['Country'] = sf['Country'].apply(transform_country)

In [21]:
sf

Unnamed: 0,First Name,Last Name,Country,age,Full Name
0,Bob,Smith,United States,24,Bob Smith
1,Alice,Williams,Canada,23,Alice Williams
2,Malcolm,Jone,England,22,Malcolm Jone
3,Felix,Brown,United States,23,Felix Brown
4,Alex,Cooper,Poland,23,Alex Cooper
5,Tod,Campbell,United States,22,Tod Campbell
6,Derek,Ward,Switzerland,25,Derek Ward
