# How to master Python’s main data analysis library at scale with SAS in 20 Minutes (integration between PANDA and SAS Cloud Analytic Services)

### How to use panda syntax transparently and delegate scalable distributed in-memory processing to the SAS Cloud Analytic Services

In [2]:
import os

In [3]:
import pandas as pd
import swat
import sys
from matplotlib import pyplot as plt
%matplotlib inline

## 1) Establish secured connection to SAS Cloud Analytic Service in-memory processing engine

In [4]:
os.environ['CAS_CLIENT_SSL_CA_LIST'] = '/opt/sas/viya/config/etc/SASSecurityCertificateFramework/cacerts/trustedcerts.pem'

In [None]:
cashost='frasepviya35smp'
casport=5570
sess = swat.CAS(cashost, casport)

In [3]:
sess.serverstatus()

NOTE: Grid node action status report: 1 nodes, 8 total actions executed.


Unnamed: 0,nodes,actions
0,1,8

Unnamed: 0,name,role,uptime,running,stalled
0,sepviya35.aws.sas.com,controller,2.441,0,0


In [4]:
mytab = sess.upload_file('https://raw.githubusercontent.com/FBosler/you-datascientist/master/happiness_with_continent.csv', casout='mytab')

NOTE: Cloud Analytic Services made the uploaded file available as table MYTAB in caslib CASUSER(viyademo01).
NOTE: The table MYTAB has been created in caslib CASUSER(viyademo01) from binary data uploaded to Cloud Analytic Services.


## 2) Inspecting / Sorting / Filtering data

In [9]:
mytab.head(5)

Unnamed: 0,Country name,Year,Life Ladder,Log GDP per capita,Social support,Healthy life expectancy at birth,Freedom to make life choices,Generosity,Perceptions of corruption,Positive affect,...,"GINI index (World Bank estimate), average 2000-16","gini of household income reported in Gallup, by wp5-year","Most people can be trusted, Gallup","Most people can be trusted, WVS round 1981-1984","Most people can be trusted, WVS round 1989-1993","Most people can be trusted, WVS round 1994-1998","Most people can be trusted, WVS round 1999-2004","Most people can be trusted, WVS round 2005-2009","Most people can be trusted, WVS round 2010-2014",Continent
0,Afghanistan,2008.0,3.72359,7.16869,0.450662,50.799999,0.718114,0.177889,0.881686,0.517637,...,,,,,,,,,,Asia
1,Afghanistan,2009.0,4.401778,7.33379,0.552308,51.200001,0.678896,0.200178,0.850035,0.583926,...,,0.441906,0.286315,,,,,,,Asia
2,Afghanistan,2010.0,4.758381,7.386629,0.539075,51.599998,0.600127,0.134353,0.706766,0.618265,...,,0.327318,0.275833,,,,,,,Asia
3,Afghanistan,2011.0,3.831719,7.415019,0.521104,51.919998,0.495901,0.172137,0.731109,0.611387,...,,0.336764,,,,,,,,Asia
4,Afghanistan,2012.0,3.782938,7.517126,0.520637,52.240002,0.530935,0.244273,0.77562,0.710385,...,,0.34454,,,,,,,,Asia


In [10]:
mytab.tail(5)

Unnamed: 0,Country name,Year,Life Ladder,Log GDP per capita,Social support,Healthy life expectancy at birth,Freedom to make life choices,Generosity,Perceptions of corruption,Positive affect,...,"GINI index (World Bank estimate), average 2000-16","gini of household income reported in Gallup, by wp5-year","Most people can be trusted, Gallup","Most people can be trusted, WVS round 1981-1984","Most people can be trusted, WVS round 1989-1993","Most people can be trusted, WVS round 1994-1998","Most people can be trusted, WVS round 1999-2004","Most people can be trusted, WVS round 2005-2009","Most people can be trusted, WVS round 2010-2014",Continent
1699,Zimbabwe,2007.0,3.280247,7.313939,0.828113,42.860001,0.455957,-0.061563,0.946287,0.660861,...,0.432,,,,,,0.116683,,0.0829422697424888,Africa
1700,Zimbabwe,2008.0,3.174264,7.102516,0.843475,44.139999,0.343556,-0.068002,0.963846,0.630983,...,0.432,,,,,,0.116683,,0.0829422697424888,Africa
1701,Zimbabwe,2009.0,4.055914,7.197595,0.805781,45.419998,0.411089,-0.055836,0.930818,0.735503,...,0.432,0.545112,0.148151,,,,0.116683,,0.0829422697424888,Africa
1702,Zimbabwe,2010.0,4.68157,7.29633,0.856638,46.700001,0.664718,-0.066457,0.828361,0.747702,...,0.432,0.68003,,,,,0.116683,,0.0829422697424888,Africa
1703,Zimbabwe,2011.0,4.845642,7.418864,0.864694,48.119999,0.632978,-0.062267,0.8298,0.781189,...,0.432,0.514646,,,,,0.116683,,0.0829422697424888,Africa


In [7]:
mytab_sample = mytab.sample(100)

In [8]:
mytab_sample.head(100)

Unnamed: 0,Country name,Year,Life Ladder,Log GDP per capita,Social support,Healthy life expectancy at birth,Freedom to make life choices,Generosity,Perceptions of corruption,Positive affect,...,"GINI index (World Bank estimate), average 2000-16","gini of household income reported in Gallup, by wp5-year","Most people can be trusted, Gallup","Most people can be trusted, WVS round 1981-1984","Most people can be trusted, WVS round 1989-1993","Most people can be trusted, WVS round 1994-1998","Most people can be trusted, WVS round 1999-2004","Most people can be trusted, WVS round 2005-2009","Most people can be trusted, WVS round 2010-2014",Continent
0,Afghanistan,2015.0,3.982855,7.500539,0.528597,53.200001,0.388928,0.089091,0.880638,0.553553,...,,0.596918,,,,,,,,Asia
1,Albania,2017.0,4.639548,9.376145,0.637698,68.400002,0.749611,-0.032643,0.876135,0.669241,...,0.303250,0.410488,,,,0.243243,0.232000,,,Europe
2,Albania,2018.0,5.004403,9.412399,0.683592,68.699997,0.824212,0.005385,0.899129,0.713300,...,0.303250,0.456174,,,,0.243243,0.232000,,,Europe
3,Algeria,2014.0,6.354898,9.509210,0.818189,65.139999,,,,0.625905,...,0.276000,0.475492,,,,,0.107644,,0.17928633093833926,Africa
4,Australia,2014.0,7.288550,10.681601,0.923799,72.599998,0.922932,0.310035,0.442021,0.775210,...,0.342750,0.451517,,0.4781493842601776,,0.394492,,0.4613790214061737,0.5181400775909424,Oceania
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,Uruguay,2013.0,6.444465,9.866335,0.917280,68.419998,0.888278,-0.052931,0.585632,0.826393,...,0.427364,0.456668,,,,0.216405,,0.2843930721282959,0.14435146749019626,South America
96,Uruguay,2015.0,6.628080,9.895024,0.891493,68.699997,0.916880,-0.042142,0.673476,0.892661,...,0.427364,0.408701,,,,0.216405,,0.2843930721282959,0.14435146749019626,South America
97,Uzbekistan,2017.0,6.421448,8.740833,0.942131,64.800003,0.985178,0.116511,0.464642,0.838989,...,0.348000,0.445986,,,,,,,0.13922356069087982,Asia
98,Vietnam,2006.0,5.293660,8.191376,0.887664,65.860001,0.885792,0.017538,,0.682261,...,0.362750,,,,,,0.387000,0.5100536346435547,,Asia


In [9]:
mytab_sample

CASTable('_PY_T_BF611C35_5AB8_4EAB_BC74_0F74B61FCB63', caslib='CASUSER(viyademo01)')

In [10]:
mytab.shape

(1704, 27)

In [11]:
mytab.columns

Index(['Country name', 'Year', 'Life Ladder', 'Log GDP per capita',
       'Social support', 'Healthy life expectancy at birth',
       'Freedom to make life choices', 'Generosity',
       'Perceptions of corruption', 'Positive affect', 'Negative affect',
       'Confidence in national government', 'Democratic Quality',
       'Delivery Quality', 'Standard deviation of ladder by country-year',
       'Standard deviation/Mean of ladder by country-year',
       'GINI index (World Bank estimate)',
       'GINI index (World Bank estimate), average 2000-16',
       'gini of household income reported in Gallup, by wp5-year',
       'Most people can be trusted, Gallup',
       'Most people can be trusted, WVS round 1981-1984',
       'Most people can be trusted, WVS round 1989-1993',
       'Most people can be trusted, WVS round 1994-1998',
       'Most people can be trusted, WVS round 1999-2004',
       'Most people can be trusted, WVS round 2005-2009',
       'Most people can be trusted, WV

In [12]:
mytab.index

In [5]:
mytab.info()

CASTable('MYTAB', caslib='CASUSER(viyademo01)')
Data columns (total 27 columns):
                                                       N   Miss     Type
Country name                                        1704  False  varchar
Year                                                1704  False   double
Life Ladder                                         1704  False   double
Log GDP per capita                                  1676   True   double
Social support                                      1691   True   double
Healthy life expectancy at birth                    1676   True   double
Freedom to make life choices                        1675   True   double
Generosity                                          1622   True   double
Perceptions of corruption                           1608   True   double
Positive affect                                     1685   True   double
Negative affect                                     1691   True   double
Confidence in national government          

In [14]:
mytab.describe()

Unnamed: 0,Year,Life Ladder,Log GDP per capita,Social support,Healthy life expectancy at birth,Freedom to make life choices,Generosity,Perceptions of corruption,Positive affect,Negative affect,...,Democratic Quality,Delivery Quality,Standard deviation of ladder by country-year,Standard deviation/Mean of ladder by country-year,GINI index (World Bank estimate),"GINI index (World Bank estimate), average 2000-16","gini of household income reported in Gallup, by wp5-year","Most people can be trusted, Gallup","Most people can be trusted, WVS round 1994-1998","Most people can be trusted, WVS round 1999-2004"
count,1704.0,1704.0,1676.0,1691.0,1676.0,1675.0,1622.0,1608.0,1685.0,1691.0,...,1558.0,1559.0,1704.0,1704.0,643.0,1502.0,1335.0,180.0,618.0,491.0
mean,2012.33216,5.437155,9.222456,0.81057,63.111971,0.733829,7.9e-05,0.751315,0.709368,0.265679,...,-0.136053,-0.00139,2.026707,0.392121,0.37,0.385438,0.447771,0.226295,0.249574,0.26807
std,3.688072,1.121149,1.185794,0.11921,7.583622,0.144115,0.163365,0.186074,0.107984,0.084707,...,0.876074,0.975849,0.401484,0.124661,0.083232,0.082396,0.108505,0.119079,0.118126,0.14512
min,2005.0,2.661718,6.457201,0.290184,32.299999,0.257534,-0.336385,0.035198,0.362498,0.083426,...,-2.448228,-2.144974,0.863034,0.133908,0.24,0.211,0.200969,0.066618,0.04872,0.075872
25%,2009.0,4.610333,8.303352,0.747411,58.299999,0.638375,-0.1156,0.695672,0.621855,0.205413,...,-0.792426,-0.71158,1.743222,0.310129,0.305,0.321429,0.368422,0.139727,0.176876,0.155833
50%,2012.0,5.339557,9.406206,0.833098,65.0,0.752731,-0.02208,0.805775,0.718541,0.254544,...,-0.227386,-0.218633,1.97307,0.372744,0.352,0.371,0.426541,0.19845,0.229924,0.232
75%,2015.0,6.274104,10.193813,0.904474,68.300003,0.848194,0.093729,0.876533,0.80153,0.314923,...,0.651249,0.700624,2.242557,0.456327,0.428,0.4322,0.51496,0.28319,0.294242,0.385469
max,2018.0,8.018934,11.770276,0.987343,76.800003,0.985178,0.677743,0.983276,0.943621,0.70459,...,1.575009,2.184725,3.718958,1.022769,0.634,0.626,0.961435,0.640332,0.647737,0.637185


In [15]:
mytab_sort1= mytab.sort_values(by="Year", ascending=True)

In [16]:
mytab_sort1

CASTable('HMEQ', caslib='CASUSER(viyademo01)').sort_values('Year')

In [17]:
mytab_sort1.head(10)

Unnamed: 0,Country name,Year,Life Ladder,Log GDP per capita,Social support,Healthy life expectancy at birth,Freedom to make life choices,Generosity,Perceptions of corruption,Positive affect,...,"GINI index (World Bank estimate), average 2000-16","gini of household income reported in Gallup, by wp5-year","Most people can be trusted, Gallup","Most people can be trusted, WVS round 1981-1984","Most people can be trusted, WVS round 1989-1993","Most people can be trusted, WVS round 1994-1998","Most people can be trusted, WVS round 1999-2004","Most people can be trusted, WVS round 2005-2009","Most people can be trusted, WVS round 2010-2014",Continent
0,Australia,2005.0,7.340688,10.56947,0.967892,71.400002,0.934973,,0.390416,0.842648,...,0.34275,,,0.4781493842601776,,0.394492,,0.4613790214061737,0.5181400775909424,Oceania
1,Czech Republic,2005.0,6.439257,10.157401,0.918759,67.0,0.865235,,0.900733,0.722875,...,0.264167,,,,0.3022751808166504,0.285192,,,,Europe
2,Canada,2005.0,7.418048,10.608347,0.961552,71.300003,0.957306,0.244575,0.502681,0.838544,...,0.3368,,,,,,0.385469,0.4185004532337189,,North America
3,Brazil,2005.0,6.636771,9.41724,0.882923,63.299999,0.882186,,0.744994,0.818337,...,0.547286,,,,0.0660198628902435,,,0.0938110947608947,0.0706378445029258,South America
4,Greece,2005.0,6.00631,10.294153,0.836539,70.5,0.734172,,0.860563,0.691998,...,0.346385,,,,,,,,,Europe
5,Germany,2005.0,6.61955,10.537519,0.96349,70.199997,0.846624,,0.781007,0.775692,...,0.306222,,,,,0.321323,,0.3435913622379303,0.4474876523017883,Europe
6,France,2005.0,7.093393,10.505228,0.940338,71.300003,0.894819,,0.687851,0.768988,...,0.320923,,,,,,,0.1876423209905624,,Europe
7,Egypt,2005.0,5.167754,8.987387,0.847842,59.700001,0.817362,,,0.734863,...,0.312,,,,,,0.374667,0.1850128024816513,0.2150088995695114,Africa
8,Denmark,2005.0,8.018934,10.70477,0.972372,69.599998,0.971135,,0.236522,0.859549,...,0.267,,,,,,,,,Europe
9,Belgium,2005.0,7.26229,10.591697,0.934875,69.900002,0.923843,,0.597554,0.796279,...,0.284308,,,,,,,,,Europe


In [18]:
mytab_sort2= mytab.sort_values(
    by=['Country name','Year'], ascending=[False,True])

In [19]:
mytab_sort2.head(10)

Unnamed: 0,Country name,Year,Life Ladder,Log GDP per capita,Social support,Healthy life expectancy at birth,Freedom to make life choices,Generosity,Perceptions of corruption,Positive affect,...,"GINI index (World Bank estimate), average 2000-16","gini of household income reported in Gallup, by wp5-year","Most people can be trusted, Gallup","Most people can be trusted, WVS round 1981-1984","Most people can be trusted, WVS round 1989-1993","Most people can be trusted, WVS round 1994-1998","Most people can be trusted, WVS round 1999-2004","Most people can be trusted, WVS round 2005-2009","Most people can be trusted, WVS round 2010-2014",Continent
0,Zimbabwe,2006.0,3.826268,7.366704,0.821656,41.580002,0.43111,-0.056554,0.904757,0.715229,...,0.432,,,,,,0.116683,,0.0829422697424888,Africa
1,Zimbabwe,2007.0,3.280247,7.313939,0.828113,42.860001,0.455957,-0.061563,0.946287,0.660861,...,0.432,,,,,,0.116683,,0.0829422697424888,Africa
2,Zimbabwe,2008.0,3.174264,7.102516,0.843475,44.139999,0.343556,-0.068002,0.963846,0.630983,...,0.432,,,,,,0.116683,,0.0829422697424888,Africa
3,Zimbabwe,2009.0,4.055914,7.197595,0.805781,45.419998,0.411089,-0.055836,0.930818,0.735503,...,0.432,0.545112,0.148151,,,,0.116683,,0.0829422697424888,Africa
4,Zimbabwe,2010.0,4.68157,7.29633,0.856638,46.700001,0.664718,-0.066457,0.828361,0.747702,...,0.432,0.68003,,,,,0.116683,,0.0829422697424888,Africa
5,Zimbabwe,2011.0,4.845642,7.418864,0.864694,48.119999,0.632978,-0.062267,0.8298,0.781189,...,0.432,0.514646,,,,,0.116683,,0.0829422697424888,Africa
6,Zimbabwe,2012.0,4.955101,7.534424,0.896476,49.540001,0.469531,-0.075712,0.858691,0.669279,...,0.432,0.487203,,,,,0.116683,,0.0829422697424888,Africa
7,Zimbabwe,2013.0,4.690188,7.565154,0.799274,50.959999,0.575884,-0.079458,0.830937,0.711885,...,0.432,0.555439,,,,,0.116683,,0.0829422697424888,Africa
8,Zimbabwe,2014.0,4.184451,7.562753,0.765839,52.380001,0.642034,-0.048634,0.820217,0.725214,...,0.432,0.60108,,,,,0.116683,,0.0829422697424888,Africa
9,Zimbabwe,2015.0,3.703191,7.556052,0.7358,53.799999,0.667193,-0.097354,0.810457,0.715079,...,0.432,0.655137,,,,,0.116683,,0.0829422697424888,Africa


In [20]:
mytab_sort2.loc[:, 'Country name'].head()

0    Zimbabwe
1    Zimbabwe
2    Zimbabwe
3    Zimbabwe
4    Zimbabwe
Name: Country name, dtype: object

In [21]:
mytab_sort2.iloc[:, 1].head()

0    2006.0
1    2007.0
2    2008.0
3    2009.0
4    2010.0
Name: Year, dtype: float64

In [22]:
mytab_sort2.iloc[:, [1,5,3]].head()

Unnamed: 0,Year,Healthy life expectancy at birth,Log GDP per capita
0,2006.0,41.580002,7.366704
1,2007.0,42.860001,7.313939
2,2008.0,44.139999,7.102516
3,2009.0,45.419998,7.197595
4,2010.0,46.700001,7.29633


In [23]:
mytab.sort_values(
    by=['Country name','Year'], ascending=[False,True], inplace=True)

In [24]:
mytab.head()

Unnamed: 0,Country name,Year,Life Ladder,Log GDP per capita,Social support,Healthy life expectancy at birth,Freedom to make life choices,Generosity,Perceptions of corruption,Positive affect,...,"GINI index (World Bank estimate), average 2000-16","gini of household income reported in Gallup, by wp5-year","Most people can be trusted, Gallup","Most people can be trusted, WVS round 1981-1984","Most people can be trusted, WVS round 1989-1993","Most people can be trusted, WVS round 1994-1998","Most people can be trusted, WVS round 1999-2004","Most people can be trusted, WVS round 2005-2009","Most people can be trusted, WVS round 2010-2014",Continent
0,Zimbabwe,2006.0,3.826268,7.366704,0.821656,41.580002,0.43111,-0.056554,0.904757,0.715229,...,0.432,,,,,,0.116683,,0.0829422697424888,Africa
1,Zimbabwe,2007.0,3.280247,7.313939,0.828113,42.860001,0.455957,-0.061563,0.946287,0.660861,...,0.432,,,,,,0.116683,,0.0829422697424888,Africa
2,Zimbabwe,2008.0,3.174264,7.102516,0.843475,44.139999,0.343556,-0.068002,0.963846,0.630983,...,0.432,,,,,,0.116683,,0.0829422697424888,Africa
3,Zimbabwe,2009.0,4.055914,7.197595,0.805781,45.419998,0.411089,-0.055836,0.930818,0.735503,...,0.432,0.545112,0.148151,,,,0.116683,,0.0829422697424888,Africa
4,Zimbabwe,2010.0,4.68157,7.29633,0.856638,46.700001,0.664718,-0.066457,0.828361,0.747702,...,0.432,0.68003,,,,,0.116683,,0.0829422697424888,Africa


In [34]:
mytab['Year'].head(5)

0    2006.0
1    2007.0
2    2008.0
3    2009.0
4    2010.0
Name: Year, dtype: float64

In [35]:
mytab[['Country name','Life Ladder']].sample(5).head(5)

Unnamed: 0,Country name,Life Ladder
0,Argentina,6.073158
1,Bolivia,5.834329
2,China,5.037208
3,Czech Republic,6.249618
4,El Salvador,5.70093


In [36]:
mytab_pd = mytab.to_frame()

In [37]:
mytab[mytab['Country name'] == 'Macedonia'].head()

Unnamed: 0,Country name,Year,Life Ladder,Log GDP per capita,Social support,Healthy life expectancy at birth,Freedom to make life choices,Generosity,Perceptions of corruption,Positive affect,...,"GINI index (World Bank estimate), average 2000-16","gini of household income reported in Gallup, by wp5-year","Most people can be trusted, Gallup","Most people can be trusted, WVS round 1981-1984","Most people can be trusted, WVS round 1989-1993","Most people can be trusted, WVS round 1994-1998","Most people can be trusted, WVS round 1999-2004","Most people can be trusted, WVS round 2005-2009","Most people can be trusted, WVS round 2010-2014",Continent
0,Macedonia,2007.0,4.493598,9.257283,0.810538,66.139999,0.4394,0.080502,0.869546,0.602946,...,0.390167,,,,,0.075377,0.130688,,,Europe
1,Macedonia,2009.0,4.428022,9.305205,0.734431,66.379997,0.552174,-0.041417,0.843916,0.575552,...,0.390167,0.401139,0.121652,,,0.075377,0.130688,,,Europe
2,Macedonia,2010.0,4.180202,9.337446,0.686855,66.5,0.513184,-0.057998,0.856453,0.566944,...,0.390167,0.387459,0.091314,,,0.075377,0.130688,,,Europe
3,Macedonia,2011.0,4.89818,9.359781,0.7843,66.580002,0.607463,-0.086839,0.865062,0.588337,...,0.390167,0.492776,0.116461,,,0.075377,0.130688,,,Europe
4,Macedonia,2012.0,4.639647,9.354412,0.798305,66.660004,0.613056,-0.083973,0.919845,0.641887,...,0.390167,0.375651,,,,0.075377,0.130688,,,Europe


In [38]:
mytab[(mytab['Country name'] == 'Macedonia') & (mytab['Life Ladder']>4)].head()

Unnamed: 0,Country name,Year,Life Ladder,Log GDP per capita,Social support,Healthy life expectancy at birth,Freedom to make life choices,Generosity,Perceptions of corruption,Positive affect,...,"GINI index (World Bank estimate), average 2000-16","gini of household income reported in Gallup, by wp5-year","Most people can be trusted, Gallup","Most people can be trusted, WVS round 1981-1984","Most people can be trusted, WVS round 1989-1993","Most people can be trusted, WVS round 1994-1998","Most people can be trusted, WVS round 1999-2004","Most people can be trusted, WVS round 2005-2009","Most people can be trusted, WVS round 2010-2014",Continent
0,Macedonia,2007.0,4.493598,9.257283,0.810538,66.139999,0.4394,0.080502,0.869546,0.602946,...,0.390167,,,,,0.075377,0.130688,,,Europe
1,Macedonia,2009.0,4.428022,9.305205,0.734431,66.379997,0.552174,-0.041417,0.843916,0.575552,...,0.390167,0.401139,0.121652,,,0.075377,0.130688,,,Europe
2,Macedonia,2010.0,4.180202,9.337446,0.686855,66.5,0.513184,-0.057998,0.856453,0.566944,...,0.390167,0.387459,0.091314,,,0.075377,0.130688,,,Europe
3,Macedonia,2011.0,4.89818,9.359781,0.7843,66.580002,0.607463,-0.086839,0.865062,0.588337,...,0.390167,0.492776,0.116461,,,0.075377,0.130688,,,Europe
4,Macedonia,2012.0,4.639647,9.354412,0.798305,66.660004,0.613056,-0.083973,0.919845,0.641887,...,0.390167,0.375651,,,,0.075377,0.130688,,,Europe


# Merge/ several table

In [39]:
user_usage = sess.CASTable('user_usage')
user_device = sess.CASTable('user_device')
android_devices = sess.CASTable('android_devices')

if user_usage.tableexists().exists:
    user_usage.droptable()

if user_device.tableexists().exists:
    user_device.droptable()

if android_devices.tableexists().exists:
    android_devices.droptable()

user_usage = sess.upload_file('https://raw.githubusercontent.com/shanealynn/Pandas-Merge-Tutorial/master/user_usage.csv', casout=user_usage)
user_device = sess.upload_file('https://raw.githubusercontent.com/shanealynn/Pandas-Merge-Tutorial/master/user_device.csv', casout=user_device)
android_devices = sess.upload_file('https://raw.githubusercontent.com/shanealynn/Pandas-Merge-Tutorial/master/android_devices.csv', casout=android_devices)

NOTE: Cloud Analytic Services made the uploaded file available as table USER_USAGE in caslib CASUSER(viyademo01).
NOTE: The table USER_USAGE has been created in caslib CASUSER(viyademo01) from binary data uploaded to Cloud Analytic Services.
NOTE: Cloud Analytic Services made the uploaded file available as table USER_DEVICE in caslib CASUSER(viyademo01).
NOTE: The table USER_DEVICE has been created in caslib CASUSER(viyademo01) from binary data uploaded to Cloud Analytic Services.
NOTE: Cloud Analytic Services made the uploaded file available as table ANDROID_DEVICES in caslib CASUSER(viyademo01).
NOTE: The table ANDROID_DEVICES has been created in caslib CASUSER(viyademo01) from binary data uploaded to Cloud Analytic Services.


In [41]:
%%time
result = swat.functions.merge(user_usage,user_device[['use_id', 'platform', 'device']], on='use_id')

CPU times: user 120 ms, sys: 2.53 ms, total: 123 ms
Wall time: 157 ms


In [42]:
user_usage.shape

(240, 4)

In [43]:
user_device.shape

(272, 6)

In [44]:
result.shape

(159, 6)

In [45]:
%%time
result = swat.functions.merge(user_usage,user_device[['use_id', 'platform', 'device']], on='use_id', how='left')

CPU times: user 126 ms, sys: 2.47 ms, total: 128 ms
Wall time: 172 ms


In [25]:
sess.close()