# Team Pandas Series

In [187]:
# imports

import pandas as pd
import numpy as np

---

### Kombat 1: How to create a series from a list, numpy array and dict?

Create a pandas series from each of the items below: a list, numpy and a dictionary

In [188]:
# Input

mylist = list('abcde')
myarr = np.arange(5)
mydict = dict(zip(mylist, myarr))

In [189]:
# Fight!!!

mylistserie = pd.Series(mylist)
myarrserie = pd.Series(myarr)
mydictserie = pd.Series(mydict)

In [190]:
# Output

print(mylistserie)
print(myarrserie)
print(mydictserie)

0    a
1    b
2    c
3    d
4    e
dtype: object
0    0
1    1
2    2
3    3
4    4
dtype: int64
a    0
b    1
c    2
d    3
e    4
dtype: int64


---

### Kombat 2: How to get the items of series A not present in series B?

From ser1 remove items present in ser2.

In [191]:
# Input

ser1 = pd.Series([1, 2, 3, 4, 5])
ser2 = pd.Series([4, 5, 6, 7, 8])

In [192]:
# Fight

result = ser1[~ser1.isin(ser2)]
result

0    1
1    2
2    3
dtype: int64

In [193]:
# Output

ser1

0    1
1    2
2    3
3    4
4    5
dtype: int64

---

### Kombat 3: How to bin a numeric series to 10 groups of equal size?

Bin the series ser into 10 equal deciles and replace the values with the bin name.

In [194]:
# Input

ser = pd.Series(np.random.random(20))

In [195]:
# Fight

bins = pd.qcut(ser, q=10, labels=False)
deciles = [f'Decile {i+1}' for i in range(10)]
result = pd.Series(deciles)[bins]

In [196]:
# Output

print(result)

7     Decile 8
8     Decile 9
6     Decile 7
9    Decile 10
0     Decile 1
2     Decile 3
7     Decile 8
4     Decile 5
3     Decile 4
4     Decile 5
3     Decile 4
1     Decile 2
9    Decile 10
8     Decile 9
2     Decile 3
6     Decile 7
0     Decile 1
5     Decile 6
5     Decile 6
1     Decile 2
dtype: object


---

### Kombat 4: How to convert a numpy array to a dataframe of given shape?

Reshape the series ser into a dataframe with 7 rows and 5 columns.

In [197]:
# Input

ser = pd.Series(np.random.randint(1, 10, 35))

In [198]:
# Fight

df = pd.DataFrame(np.array(ser).reshape(7, 5))


In [199]:
# Output

df

Unnamed: 0,0,1,2,3,4
0,2,7,2,8,5
1,1,6,9,5,2
2,9,5,5,6,8
3,6,9,3,3,7
4,8,1,3,3,4
5,7,3,7,3,8
6,5,3,4,3,2


---

### Kombat 5: How to create a TimeSeries starting ‘2022-01-02’ and 10 weekends (sundays) after that, having random numbers as values?

In [200]:
# Input

time_series = pd.Series(np.random.randint(1, 10, 10))

In [201]:
# Fight
start_date = '2022-01-02'

indices = pd.date_range(start=start_date, periods=10, freq='W-SUN')

time_series = pd.Series(np.random.randint(1, 10, 10), index=indices)


In [202]:
# Output

time_series

2022-01-02    3
2022-01-09    2
2022-01-16    3
2022-01-23    1
2022-01-30    8
2022-02-06    6
2022-02-13    7
2022-02-20    8
2022-02-27    4
2022-03-06    3
Freq: W-SUN, dtype: int64

---

### Kombat 6: How to change column values when importing csv to a dataframe?

Import the boston housing dataset, but while importing change the 'medv' (median house value) column so that values < 25 becomes ‘Low’ and > 25 becomes ‘High’.

In [203]:
# Input

url = 'https://raw.githubusercontent.com/selva86/datasets/master/BostonHousing.csv'

In [204]:
# Fight

df = pd.read_csv(url, converters = {'medv':lambda x: 'Low' if float(x) < 25 else 'High'})

In [205]:
# Solution

df

Unnamed: 0,crim,zn,indus,chas,nox,rm,age,dis,rad,tax,ptratio,b,lstat,medv
0,0.00632,18.0,2.31,0,0.538,6.575,65.2,4.0900,1,296,15.3,396.90,4.98,Low
1,0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242,17.8,396.90,9.14,Low
2,0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242,17.8,392.83,4.03,High
3,0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222,18.7,394.63,2.94,High
4,0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222,18.7,396.90,5.33,High
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
501,0.06263,0.0,11.93,0,0.573,6.593,69.1,2.4786,1,273,21.0,391.99,9.67,Low
502,0.04527,0.0,11.93,0,0.573,6.120,76.7,2.2875,1,273,21.0,396.90,9.08,Low
503,0.06076,0.0,11.93,0,0.573,6.976,91.0,2.1675,1,273,21.0,396.90,5.64,Low
504,0.10959,0.0,11.93,0,0.573,6.794,89.3,2.3889,1,273,21.0,393.45,6.48,Low


---

### Kombat 7: How to get the nrows, ncolumns, datatype, summary stats of each column of a dataframe?

Get the number of rows, columns, datatype and summary statistics of each column of the Cars93 dataset.

In [206]:
# Input

url = 'https://raw.githubusercontent.com/selva86/datasets/master/Cars93_miss.csv'

In [207]:
#Fight

df = pd.read_csv(url)

nrows, ncols = df.shape

In [208]:
# Output
print(f"Number of rows: {nrows}")
print(f"Number of columns: {ncols}\n")

print("Data types:")
print(df.dtypes)
print('')

print("Data statistics of each column:")
df.describe()

Number of rows: 93
Number of columns: 27

Data types:
Manufacturer           object
Model                  object
Type                   object
Min.Price             float64
Price                 float64
Max.Price             float64
MPG.city              float64
MPG.highway           float64
AirBags                object
DriveTrain             object
Cylinders              object
EngineSize            float64
Horsepower            float64
RPM                   float64
Rev.per.mile          float64
Man.trans.avail        object
Fuel.tank.capacity    float64
Passengers            float64
Length                float64
Wheelbase             float64
Width                 float64
Turn.circle           float64
Rear.seat.room        float64
Luggage.room          float64
Weight                float64
Origin                 object
Make                   object
dtype: object

Data statistics of each column:


Unnamed: 0,Min.Price,Price,Max.Price,MPG.city,MPG.highway,EngineSize,Horsepower,RPM,Rev.per.mile,Fuel.tank.capacity,Passengers,Length,Wheelbase,Width,Turn.circle,Rear.seat.room,Luggage.room,Weight
count,86.0,91.0,88.0,84.0,91.0,91.0,86.0,90.0,87.0,85.0,91.0,89.0,92.0,87.0,88.0,89.0,74.0,86.0
mean,17.118605,19.616484,21.459091,22.404762,29.065934,2.658242,144.0,5276.666667,2355.0,16.683529,5.076923,182.865169,103.956522,69.448276,38.954545,27.853933,13.986486,3104.593023
std,8.82829,9.72428,10.696563,5.84152,5.370293,1.045845,53.455204,605.554811,486.916616,3.375748,1.045953,14.792651,6.856317,3.778023,3.304157,3.018129,3.120824,600.129993
min,6.7,7.4,7.9,15.0,20.0,1.0,55.0,3800.0,1320.0,9.2,2.0,141.0,90.0,60.0,32.0,19.0,6.0,1695.0
25%,10.825,12.35,14.575,18.0,26.0,1.8,100.75,4800.0,2017.5,14.5,4.0,174.0,98.0,67.0,36.0,26.0,12.0,2647.5
50%,14.6,17.7,19.15,21.0,28.0,2.3,140.0,5200.0,2360.0,16.5,5.0,181.0,103.0,69.0,39.0,27.5,14.0,3085.0
75%,20.25,23.5,24.825,25.0,31.0,3.25,170.0,5787.5,2565.0,19.0,6.0,192.0,110.0,72.0,42.0,30.0,16.0,3567.5
max,45.4,61.9,80.0,46.0,50.0,5.7,300.0,6500.0,3755.0,27.0,8.0,219.0,119.0,78.0,45.0,36.0,22.0,4105.0


---

### Kombat 8: How to slice a DataFrame by column value?

Get every Chevrolet car with a EngineSize lower than 3.0 from the Cars93 dataset.

In [209]:
# Input

url = 'https://raw.githubusercontent.com/selva86/datasets/master/Cars93_miss.csv'

In [210]:
# Fight

df = pd.read_csv(url)
chevrolet_cars = df[(df['Manufacturer'] == 'Chevrolet') & (df['EngineSize'] < 3)]


In [211]:
# Output

chevrolet_cars

Unnamed: 0,Manufacturer,Model,Type,Min.Price,Price,Max.Price,MPG.city,MPG.highway,AirBags,DriveTrain,...,Passengers,Length,Wheelbase,Width,Turn.circle,Rear.seat.room,Luggage.room,Weight,Origin,Make
11,Chevrolet,Cavalier,Compact,8.5,13.4,18.3,25.0,36.0,,,...,5.0,182.0,101.0,66.0,38.0,25.0,13.0,2490.0,USA,Chevrolet Cavalier
12,Chevrolet,Corsica,Compact,11.4,11.4,11.4,25.0,34.0,Driver only,Front,...,5.0,184.0,103.0,68.0,39.0,26.0,,2785.0,USA,Chevrolet Corsica
14,Chevrolet,Lumina,Midsize,13.4,15.9,18.4,,29.0,,Front,...,6.0,,108.0,71.0,,28.5,16.0,3195.0,USA,Chevrolet Lumina


---

# FINISH HIM!!!