In [2]:
import pandas as pd
import numpy as np

In [3]:
#numpy is fast for 2 main reasons:
# -all items in array are same data type
# -items in array are stored in contiguous memory locations

In [4]:
# numpy has many functions that are written for numpy arrays (optimized)

In [5]:
# lets create a numpy array and a python list to compare
item_cnt = 1000000 # 1 million items
numpy_array = np.arange(0, item_cnt)
python_list = list(range(item_cnt))

In [6]:
# using the numpy specific sum function on a numpy array
%timeit np.sum(numpy_array)

738 µs ± 82.8 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [7]:
# using the python sum function on a python list
%timeit sum(python_list)

7.25 ms ± 50.8 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [None]:
# any more options?

In [8]:
# using the numpy sum function on a python list
%timeit np.sum(python_list)

49.8 ms ± 710 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [9]:
# using the python sum function on a numpy array
%timeit sum(numpy_array)

75.3 ms ± 932 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [11]:
# create a n-dimensional array
my_lst_ndarray = np.array([1, 2, 3, 4, 5])
my_tuple_ndarray = np.array((1, 2, 3, 4, 5), np.int32)

In [12]:
print(my_lst_ndarray.shape)
print(my_tuple_ndarray.shape)

(5,)
(5,)


In [13]:
print(my_lst_ndarray.dtype)
print(my_tuple_ndarray.dtype)

int64
int32


In [15]:
# remember all items in an nd array must be the same type
my_lst_ndarray2 = np.array(["1", 2, 66653, "10", 5])
print(my_lst_ndarray2.dtype)

<U5


In [16]:
my_lst_ndarray3 = np.array([1, 2, 3, "10", 5], np.int32) 
print(my_lst_ndarray3.dtype)

int32


In [17]:
my_lst_ndarray3 = np.array([1, 2, 3, "bozo", 5], np.int32) 

ValueError: invalid literal for int() with base 10: 'bozo'

In [18]:
zeros_arr = np.zeros((3,4)) # Create a matrix of zeros with 3 rows and 4 columns. 
ones_arr = np.ones((10,20))  # Create a matrix of ones with 10 rows and 20 columns.
identity_arr = np.identity(50) # Create an identity matrix with 50 rows and 50 columns. 
random_arr = np.random.rand(2, 2) # Create a 2x2 array of random floats ranging from 0 to 1. 
range_arr = np.arange(0, 20, 0.5) # Create a numpy array with arguments (start, end, step_size). 

In [25]:
zeros_arr
ones_arr
# print(identity_arr)
random_arr
range_arr

array([  0. ,   0.5,   1. ,   1.5,   2. ,   2.5,   3. ,   3.5,   4. ,
         4.5,   5. ,   5.5,   6. ,   6.5,   7. ,   7.5,   8. ,   8.5,
         9. ,   9.5,  10. ,  10.5,  11. ,  11.5,  12. ,  12.5,  13. ,
        13.5,  14. ,  14.5,  15. ,  15.5,  16. ,  16.5,  17. ,  17.5,
        18. ,  18.5,  19. ,  19.5])

In [26]:
np.eye(4,5)

array([[ 1.,  0.,  0.,  0.,  0.],
       [ 0.,  1.,  0.,  0.,  0.],
       [ 0.,  0.,  1.,  0.,  0.],
       [ 0.,  0.,  0.,  1.,  0.]])

In [27]:
first_arr  = np.array([1, 2, 3, 4])
second_arr = np.array([5, 6, 7, 8])
first_arr + second_arr
# Each element is lined up with it's corresponding element in the other array, and the addition is then performed. 

array([ 6,  8, 10, 12])

In [28]:
first_arr = np.array([[1, 2], [3, 4]]) # This is now a two-dimensional array. 
second_arr = np.array([[5, 6], [7, 8]]) # This is now a two-dimensional array. 
first_arr * second_arr
# Each element is lined up with it's corresponding element in the other array, and the multiplication is then performed. 

array([[ 5, 12],
       [21, 32]])

In [29]:
rows = first_arr.shape[0]
rows

2

In [30]:
first_arr.shape

(2, 2)

In [31]:
first_arr = np.array([[1, 2], [3, 4]]) # This is now a two-dimensional array. 

In [32]:
first_arr - 4

array([[-3, -2],
       [-1,  0]])

In [33]:
first_arr * 5

array([[ 5, 10],
       [15, 20]])

In [34]:
first_arr % 3

array([[1, 2],
       [0, 1]])

In [35]:
first_arr = np.array([[1, 2], [3, 4]]) # This is now a two-dimensional array. 

In [36]:
first_arr - [4, 5]
# Here, we subtract 4 off the first **column** and 5 off the second **column**.

array([[-3, -3],
       [-1, -1]])

In [37]:
first_arr - [[4], [5]] # Here, we subtract 4 from the first **row** and 5 from the second **row**. 

array([[-3, -2],
       [-2, -1]])

In [38]:
# Reshape will reshape the data to the shape that you tell it to (here 5 rows, 4 columns). 
range_arr = np.arange(0, 20, 1)
print(range_arr)
range_arr = range_arr.reshape(5,4)
print(range_arr)

[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19]
[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]
 [12 13 14 15]
 [16 17 18 19]]


In [39]:
range_arr[:, 2] # Grab every row, but only the element at index 2 in those rows. 

array([ 2,  6, 10, 14, 18])

In [40]:
range_arr[0:2] # With no second index, this defaults to taking the rows. 

array([[0, 1, 2, 3],
       [4, 5, 6, 7]])

In [41]:
# The first set of numbers refers to the rows to grab, the second set the columns.  
range_arr[0:2, 1:3]

array([[1, 2],
       [5, 6]])

In [42]:
range_arr

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15],
       [16, 17, 18, 19]])

In [43]:
range_arr.sum(axis=1) # Sum each row (i.e. get row totals)

array([ 6, 22, 38, 54, 70])

In [44]:
range_arr.sum(axis=0) # Sum each column (i.e. get col totals)

array([40, 45, 50, 55])

In [45]:
range_arr.sum() # Get sum of all elements in numpy array. 

190

In [46]:
range_arr.mean(axis=0)

array([  8.,   9.,  10.,  11.])

In [47]:
range_arr.std(axis=0)

array([ 5.65685425,  5.65685425,  5.65685425,  5.65685425])

In [48]:
range_arr.max(axis=0)

array([16, 17, 18, 19])

In [49]:
range_arr[1,3]=155
# range_arr[0][2]=15
range_arr

array([[  0,   1,   2,   3],
       [  4,   5,   6, 155],
       [  8,   9,  10,  11],
       [ 12,  13,  14,  15],
       [ 16,  17,  18,  19]])

In [50]:
range_arr.argmin() # We see that the mins of each column occur at row 1 (index 0).

0

In [51]:
range_arr.argmax(axis=0) # We see that the maxes of each column occur at row 5 (index 4).

array([4, 4, 4, 1])

In [52]:
range_arr.argmin() # Here we get the index of the overall minimum (the 0th index).

0

In [53]:
range_arr

array([[  0,   1,   2,   3],
       [  4,   5,   6, 155],
       [  8,   9,  10,  11],
       [ 12,  13,  14,  15],
       [ 16,  17,  18,  19]])

In [54]:
range_arr.argmax() # Here we get the index of the overall maximum (the last index in second row). 

7

In [55]:
range_arr

array([[  0,   1,   2,   3],
       [  4,   5,   6, 155],
       [  8,   9,  10,  11],
       [ 12,  13,  14,  15],
       [ 16,  17,  18,  19]])

In [56]:
range_arr.cumsum(axis=0)  # Here it gets the cumsum along the rows (i.e. from top to bottom)

array([[  0,   1,   2,   3],
       [  4,   6,   8, 158],
       [ 12,  15,  18, 169],
       [ 24,  28,  32, 184],
       [ 40,  45,  50, 203]])

In [57]:
range_arr

array([[  0,   1,   2,   3],
       [  4,   5,   6, 155],
       [  8,   9,  10,  11],
       [ 12,  13,  14,  15],
       [ 16,  17,  18,  19]])

In [58]:
range_arr.cumprod(axis=0) # Gets the cumprod along the rows

array([[      0,       1,       2,       3],
       [      0,       5,      12,     465],
       [      0,      45,     120,    5115],
       [      0,     585,    1680,   76725],
       [      0,    9945,   30240, 1457775]])

In [59]:
range_arr

array([[  0,   1,   2,   3],
       [  4,   5,   6, 155],
       [  8,   9,  10,  11],
       [ 12,  13,  14,  15],
       [ 16,  17,  18,  19]])

In [61]:
# We can flatten our arrays as follows. 
range_arr.flatten()
range_arr.ravel()  # They look the same in this case. 

array([  0,   1,   2,   3,   4,   5,   6, 155,   8,   9,  10,  11,  12,
        13,  14,  15,  16,  17,  18,  19])

In [None]:
range_arr

In [62]:
my_ndarray = np.array([2, 4, 6, 8, 24, 3, 8, 9, 12])

In [64]:
my_ndarray

array([ 2,  4,  6,  8, 24,  3,  8,  9, 12])

In [63]:
print(np.where(my_ndarray <= 2)) # Returns the indices where the data meet the condition. 
print(np.where(my_ndarray == 8)) # Returns the indices where the data meet the condition. 
print(np.where(my_ndarray > 6)) # Returns the indices where the data meet the condition. 

(array([0]),)
(array([3, 6]),)
(array([3, 4, 6, 7, 8]),)


In [65]:
# boolean indexing
my_ndarray[my_ndarray > 6]

array([ 8, 24,  8,  9, 12])

In [66]:
my_ndarray > 6

array([False, False, False,  True,  True, False,  True,  True,  True], dtype=bool)

In [67]:
import pandas as pd
wine_df = pd.read_csv('winequality-red.csv', delimiter=';')
wine_df.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5
1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8,5
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8,5
3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8,6
4,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5


In [68]:
wine_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1599 entries, 0 to 1598
Data columns (total 12 columns):
fixed acidity           1599 non-null float64
volatile acidity        1599 non-null float64
citric acid             1599 non-null float64
residual sugar          1599 non-null float64
chlorides               1599 non-null float64
free sulfur dioxide     1599 non-null float64
total sulfur dioxide    1599 non-null float64
density                 1599 non-null float64
pH                      1599 non-null float64
sulphates               1599 non-null float64
alcohol                 1599 non-null float64
quality                 1599 non-null int64
dtypes: float64(11), int64(1)
memory usage: 150.0 KB


In [69]:
# A glance at the values of the quality of wine in the DataFrame
wine_df.quality.unique()

array([5, 6, 7, 4, 8, 3])

In [70]:
# get_dummies is a method called on the pandas module - you simply pass in a Pandas Series 
# or DataFrame, and it will convert a categorical variable into dummy/indicator variables. 
quality_dummies = pd.get_dummies(wine_df.quality, prefix='quality')
quality_dummies.head()

Unnamed: 0,quality_3,quality_4,quality_5,quality_6,quality_7,quality_8
0,0,0,1,0,0,0
1,0,0,1,0,0,0
2,0,0,1,0,0,0
3,0,0,0,1,0,0
4,0,0,1,0,0,0


In [71]:
joined_df = wine_df.join(quality_dummies)
joined_df.head() 

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality,quality_3,quality_4,quality_5,quality_6,quality_7,quality_8
0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5,0,0,1,0,0,0
1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8,5,0,0,1,0,0,0
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8,5,0,0,1,0,0,0
3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8,6,0,0,0,1,0,0
4,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5,0,0,1,0,0,0


In [72]:
# Let's now look at concat. 
joined_df2 = pd.concat([quality_dummies, wine_df], axis=1)
joined_df2.head()

Unnamed: 0,quality_3,quality_4,quality_5,quality_6,quality_7,quality_8,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,0,0,1,0,0,0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5
1,0,0,1,0,0,0,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8,5
2,0,0,1,0,0,0,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8,5
3,0,0,0,1,0,0,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8,6
4,0,0,1,0,0,0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5


In [73]:
red_wines_df = pd.read_csv('winequality-red.csv', delimiter=';')
white_wines_df = pd.read_csv('winequality-white.csv', delimiter=';')

In [74]:
red_wines_df.columns.tolist()

['fixed acidity',
 'volatile acidity',
 'citric acid',
 'residual sugar',
 'chlorides',
 'free sulfur dioxide',
 'total sulfur dioxide',
 'density',
 'pH',
 'sulphates',
 'alcohol',
 'quality']

In [75]:
white_wines_df.columns.tolist()

['fixed acidity',
 'volatile acidity',
 'citric acid',
 'residual sugar',
 'chlorides',
 'free sulfur dioxide',
 'total sulfur dioxide',
 'density',
 'pH',
 'sulphates',
 'alcohol',
 'quality']

In [76]:
red_wines_quality_df = red_wines_df.groupby('quality').mean()['fixed acidity'].reset_index()
red_wines_quality_df.head()

Unnamed: 0,quality,fixed acidity
0,3,8.36
1,4,7.779245
2,5,8.167254
3,6,8.347179
4,7,8.872362


In [77]:
white_wines_quality_df = white_wines_df.groupby('quality').mean()['fixed acidity'].reset_index()
white_wines_quality_df.head()

Unnamed: 0,quality,fixed acidity
0,3,7.6
1,4,7.129448
2,5,6.933974
3,6,6.837671
4,7,6.734716


In [78]:
df2= pd.merge(red_wines_df, white_wines_df, on=['quality'], suffixes=[' red', ' white'])

In [79]:
df2

Unnamed: 0,fixed acidity red,volatile acidity red,citric acid red,residual sugar red,chlorides red,free sulfur dioxide red,total sulfur dioxide red,density red,pH red,sulphates red,...,volatile acidity white,citric acid white,residual sugar white,chlorides white,free sulfur dioxide white,total sulfur dioxide white,density white,pH white,sulphates white,alcohol white
0,7.4,0.700,0.00,1.9,0.076,11.0,34.0,0.99780,3.51,0.56,...,0.270,0.41,1.45,0.033,11.0,63.0,0.99080,2.99,0.56,12.0
1,7.4,0.700,0.00,1.9,0.076,11.0,34.0,0.99780,3.51,0.56,...,0.230,0.40,4.20,0.035,17.0,109.0,0.99470,3.14,0.53,9.7
2,7.4,0.700,0.00,1.9,0.076,11.0,34.0,0.99780,3.51,0.56,...,0.180,0.37,1.20,0.040,16.0,75.0,0.99200,3.18,0.63,10.8
3,7.4,0.700,0.00,1.9,0.076,11.0,34.0,0.99780,3.51,0.56,...,0.420,0.62,19.25,0.040,41.0,172.0,1.00020,2.98,0.67,9.7
4,7.4,0.700,0.00,1.9,0.076,11.0,34.0,0.99780,3.51,0.56,...,0.310,0.14,7.50,0.044,34.0,133.0,0.99550,3.22,0.50,9.5
5,7.4,0.700,0.00,1.9,0.076,11.0,34.0,0.99780,3.51,0.56,...,0.670,0.14,1.50,0.074,25.0,168.0,0.99370,3.05,0.51,9.3
6,7.4,0.700,0.00,1.9,0.076,11.0,34.0,0.99780,3.51,0.56,...,0.270,0.20,14.95,0.044,22.0,179.0,0.99620,3.37,0.37,10.2
7,7.4,0.700,0.00,1.9,0.076,11.0,34.0,0.99780,3.51,0.56,...,0.280,0.43,1.70,0.080,21.0,123.0,0.99050,3.19,0.42,12.8
8,7.4,0.700,0.00,1.9,0.076,11.0,34.0,0.99780,3.51,0.56,...,0.390,0.23,5.40,0.051,25.0,149.0,0.99340,3.24,0.35,10.0
9,7.4,0.700,0.00,1.9,0.076,11.0,34.0,0.99780,3.51,0.56,...,0.240,0.39,17.95,0.057,45.0,149.0,0.99990,3.21,0.36,8.6


In [80]:
# Let's recall what the data looks like. 
red_wines_df.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5
1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8,5
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8,5
3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8,6
4,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5


In [81]:
pd.cut(red_wines_df['fixed acidity'], bins=np.arange(4, 17)).head()

0      (7, 8]
1      (7, 8]
2      (7, 8]
3    (11, 12]
4      (7, 8]
Name: fixed acidity, dtype: category
Categories (12, interval[int64]): [(4, 5] < (5, 6] < (6, 7] < (7, 8] ... (12, 13] < (13, 14] < (14, 15] < (15, 16]]

In [82]:
fixed_acidity_bins = np.arange(4, 17)
fixed_acidity_series = pd.cut(red_wines_df['fixed acidity'], bins=fixed_acidity_bins, 
                              labels=fixed_acidity_bins[:-1])
fixed_acidity_series.name = 'fa_bin'
red_wines_df = pd.concat([red_wines_df, fixed_acidity_series], axis=1)

In [83]:
red_wines_df.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality,fa_bin
0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5,7
1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8,5,7
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8,5,7
3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8,6,11
4,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5,7


In [84]:
pd.pivot_table(red_wines_df, values='residual sugar', index='quality', columns='fa_bin')

fa_bin,4,5,6,7,8,9,10,11,12,13,14,15
quality,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
3,,,1.5,3.5375,3.4,,1.8,2.2,,,,
4,1.75,5.3,2.714286,2.453846,2.583333,2.433333,2.566667,1.5,4.5,,,
5,1.6,1.85,2.492623,2.441331,2.496786,2.675,3.238889,2.77,2.393333,3.133333,,5.025
6,2.35,2.886538,2.556767,2.167027,2.281731,2.801562,2.910345,2.524359,2.9125,2.85,1.8,
7,2.1,1.9,2.595,2.655,2.796429,2.8625,2.718,2.638889,4.15,2.8,2.2,3.7
8,2.0,1.6,,2.316667,1.8,2.166667,3.866667,5.2,2.2,,,


In [85]:
# We can also specify a function to aggregate with
pd.pivot_table(red_wines_df, values='residual sugar', index='quality', columns='fa_bin', aggfunc=np.max)

fa_bin,4,5,6,7,8,9,10,11,12,13,14,15
quality,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
3,,,1.8,5.7,3.4,,2.1,2.2,,,,
4,2.1,12.9,5.6,4.4,6.3,3.4,3.4,1.6,4.5,,,
5,1.6,2.5,7.9,8.1,7.9,13.8,15.5,5.15,4.6,4.8,,7.5
6,4.3,13.9,10.7,5.5,5.1,11.0,15.4,6.2,4.3,3.8,1.8,
7,2.1,2.2,6.0,8.3,6.2,8.9,6.55,4.4,5.8,2.8,2.2,3.7
8,2.0,1.8,,3.6,1.8,2.8,6.4,5.2,2.2,,,


In [None]:
df.tail(10)

In [None]:
# Further filter the results from 6 to grab only those rows that have a residual sugar less than 2.0. 

In [None]:
# Get the average amount of chlorides for each quality value. 

In [None]:
reverse_words

In [None]:
"This is my first sentence."

In [None]:
"sihT si ym tsrif .ecnetnes"

In [89]:
a = "agdgdfgdgdgdgdgd sd ds sd fsfc"
o=""
length = len(a)
x=length-1

In [90]:
while x>-1:
    o+=a[x]
    #get closer to exiting loop
    x-=1

In [91]:
o

'cfsf ds sd ds dgdgdgdgdgfdgdga'

In [109]:
def reverse_words(sent):
    low = sent.split(' ')
    
    out_lst=[]
    for word in low:
#         x = len(word)-1
#         o=""
#         while x>-1:
# #             print(word[x])
#             o+=word[x]
#             x-=1
#             print(out_lst)
#         print(o)
        out_lst.append(word[::-1])
#     print(out_lst)
    return " ".join(out_lst)

In [114]:
def reverse_words(sent):
    return " ".join([word[::-1] for word in sent.split()])

In [115]:
reverse_words("abc 123     hello")

'cba 321 olleh'

In [116]:
def digital_root(num):
    
#     loop until we have a single digit number (<10)
    while num>=10:
        tmp_sum=0
        num_str=str(num)
        for n in num_str:
            tmp_sum+=int(n)
        num = tmp_sum
    return num
        #reduce number

In [125]:
digital_root(9991)

1

In [None]:
https://www.codewars.com/kata/sum-of-digits-slash-digital-root