In [9]:
import pandas as pd

# This is the basic Carsales DataFrame extended with two more Sales places plus the index column transformed to a feature column.
Cardata = { "Mercedes": [2, 4, 0, 4, 0, 3], "Ford": [3, 0, 0, 1, 6, 12], "Tata":[9, 3, 4, 1, 0, 0], "Renault":[12, 1, 0, 0, 3, 1]}
Carsales = pd.DataFrame(Cardata)
Carsales.rename(index={0: "One", 1: "Two", 2: "Three", 3: "Four", 4: "Five", 5: "Six"}, inplace=True)
print(Carsales)

       Mercedes  Ford  Tata  Renault
One           2     3     9       12
Two           4     0     3        1
Three         0     0     4        0
Four          4     1     1        0
Five          0     6     0        3
Six           3    12     0        1


In [10]:
print(Carsales.agg(['sum', 'max', 'min']))

     Mercedes  Ford  Tata  Renault
sum        13    22    17       17
max         4    12     9       12
min         0     0     0        0


In [13]:
# Statistics for specific column
print(Carsales.agg(Sum=('Ford', 'sum'), Maximum=('Ford', 'max'), Minimum=('Ford', 'min')))

         Ford
Sum        22
Maximum    12
Minimum     0


In [18]:
# stats for specific columns
print(Carsales.agg({'Ford': ['sum','max','min'], 'Tata': ['sum','max','min','count']}))

       Ford  Tata
sum    22.0    17
max    12.0     9
min     0.0     0
count   NaN     6


In [19]:
# stats by column
print(Carsales[['Ford', 'Mercedes', 'Tata']].agg('sum'))

Ford        22
Mercedes    13
Tata        17
dtype: int64


In [21]:
# stats by row
print(Carsales.agg('mean', axis='columns'))

One      6.50
Two      2.00
Three    1.00
Four     1.50
Five     2.25
Six      4.00
dtype: float64


In [22]:
# describe creates standard statistics for the df
print(Carsales.describe())

       Mercedes       Ford      Tata    Renault
count  6.000000   6.000000  6.000000   6.000000
mean   2.166667   3.666667  2.833333   2.833333
std    1.834848   4.676181  3.430258   4.622409
min    0.000000   0.000000  0.000000   0.000000
25%    0.500000   0.250000  0.250000   0.250000
50%    2.500000   2.000000  2.000000   1.000000
75%    3.750000   5.250000  3.750000   2.500000
max    4.000000  12.000000  9.000000  12.000000


In [24]:
# the number of indices that contain a value
# 2 indexes contain 0 for Ford for example
print(Carsales['Ford'].value_counts())

Ford
0     2
3     1
1     1
6     1
12    1
Name: count, dtype: int64


In [25]:
# setting normalize to True gives a distribution
print(Carsales['Ford'].value_counts(normalize=True))

Ford
0     0.333333
3     0.166667
1     0.166667
6     0.166667
12    0.166667
Name: proportion, dtype: float64


In [28]:
Carsales['Total Sales'] = Carsales[Carsales.columns].sum(axis=1)
print(Carsales)

       Mercedes  Ford  Tata  Renault  Total Sales
One           2     3     9       12           26
Two           4     0     3        1            8
Three         0     0     4        0            4
Four          4     1     1        0            6
Five          0     6     0        3            9
Six           3    12     0        1           16


In [31]:
# apply can be used to execute functions across the df
def sales_rating(n_sold):
    if n_sold >= 15:
        return 'Excellent'
    elif n_sold >= 5:
        return 'Acceptable'
    else:
        return 'Unacceptable'

Carsales['Rating'] = Carsales['Total Sales'].apply(sales_rating)
print(Carsales)

       Mercedes  Ford  Tata  Renault  Total Sales        Rating
One           2     3     9       12           26     Excellent
Two           4     0     3        1            8    Acceptable
Three         0     0     4        0            4  Unacceptable
Four          4     1     1        0            6    Acceptable
Five          0     6     0        3            9    Acceptable
Six           3    12     0        1           16     Excellent
