In [17]:
import pandas as pd
import numpy as np
import os
from sklearn.linear_model import LogisticRegression

In [27]:
from sklearn.model_selection import train_test_split

In [2]:
os.chdir(r'C:\Users\dell\PycharmProjects\MachineLearning\Pandas\datasets')

In [12]:
titanic = pd.read_csv('./titanic.csv')
pokemon = pd.read_csv('./pokemon.csv')
titanic.head(3)

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S


# Table-wise

## pipe

```python
DataFrame.pipe(func, *args, **kwargs)
```

<pre>
>>> f(g(h(df), arg1=a), arg2=b, arg3=c)

You can write

>>> (df.pipe(h)
...    .pipe(g, arg1=a)
...    .pipe(f, arg2=b, arg3=c)
... )
</pre>

In [8]:
softplus = lambda z: np.log(z + 1)
linear = lambda x, w:  w * x

values = pd.DataFrame([[1,2], [3,4]], dtype = float)
values

Unnamed: 0,0,1
0,1.0,2.0
1,3.0,4.0


In [10]:
softplus(linear(values, 1))

Unnamed: 0,0,1
0,0.693147,1.098612
1,1.386294,1.609438


In [11]:
#equivalent
values.pipe(linear, w = 1).pipe(softplus)

Unnamed: 0,0,1
0,0.693147,1.098612
1,1.386294,1.609438


# Row-wise and Column-wise

## apply

In [13]:
pokemon.head(3)

Unnamed: 0,#,Name,Type 1,Type 2,Total,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
0,1,Bulbasaur,Grass,Poison,318,45,49,49,65,65,45,1,False
1,2,Ivysaur,Grass,Poison,405,60,62,63,80,80,60,1,False
2,3,Venusaur,Grass,Poison,525,80,82,83,100,100,80,1,False


In [15]:
stats = pokemon.loc[:, 'Attack':'speed']
stats.head(3)

Unnamed: 0,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation
0,49,49,65,65,45,1
1,62,63,80,80,60,1
2,82,83,100,100,80,1


In [24]:
#column_wise: let's normalize each column
X = stats.apply(lambda column: (column - column.mean()) / column.std())
X.head(3)

Unnamed: 0,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation
0,-0.924328,-0.796655,-0.238981,-0.248033,-0.801002,-1.398762
1,-0.523803,-0.3477,0.219422,0.290974,-0.284837,-1.398762
2,0.09239,0.293665,0.830626,1.009651,0.403383,-1.398762


In [28]:
#Apply for a Series
y = pokemon['Legendary'].apply(lambda val: 1 if val else 0)
y.head(3)

0    0
1    0
2    0
Name: Legendary, dtype: int64

In [32]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state = 7)

In [33]:
model = LogisticRegression()
model.fit(X_train, y_train)



LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='warn', n_jobs=None, penalty='l2',
                   random_state=None, solver='warn', tol=0.0001, verbose=0,
                   warm_start=False)

In [38]:
y_pred = model.predict(X_test)
model.score(X_test, y_test)

0.925

In [47]:
coef = model.coef_[0]
coef

array([0.72692411, 0.21761582, 0.8521094 , 1.11267587, 0.79852306,
       0.54736479])

In [48]:
intercept = model.intercept_[0]
intercept

-4.4806381540328015

let's make a prediction for the test set

In [51]:
sigmoid = lambda x : 1 / (1 + np.exp(-x))

In [57]:
#row-wise: let's calculate the probability each pokemon is legendary
legendary_pred = X_test.apply(lambda row : sigmoid(row.to_numpy() @ coef + intercept), axis = 1)
legendary_pred

61     0.000548
799    0.420049
747    0.064906
668    0.000634
726    0.256871
110    0.000258
389    0.026671
464    0.013220
34     0.000124
244    0.054164
271    0.173491
283    0.355095
731    0.111733
480    0.021806
300    0.011420
772    0.036594
142    0.017028
256    0.021350
620    0.003460
277    0.006224
236    0.000166
750    0.219515
776    0.778590
477    0.166643
289    0.000034
665    0.081689
586    0.001622
518    0.220902
140    0.045611
478    0.036862
         ...   
70     0.105918
722    0.025178
698    0.354022
358    0.002136
523    0.032021
351    0.004505
739    0.630477
702    0.427408
186    0.000138
749    0.007191
471    0.041956
318    0.001179
607    0.070748
171    0.080296
505    0.018683
764    0.001989
547    0.033581
511    0.260997
532    0.166346
22     0.011361
650    0.057659
625    0.001344
2      0.049177
765    0.206822
557    0.000979
609    0.065565
686    0.044016
542    0.332902
115    0.020284
515    0.011931
Length: 200, dtype: floa

In [63]:
report = X_test.copy()
report['probability'] = legendary_pred
report['predict target'] = legendary_pred.apply(lambda v : 0 if v < .5 else 1)
report['model predict'] = model.predict(X_test)
report['true target'] = y_test
report.head(3)

Unnamed: 0,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,probability,predict target,model predict,true target
61,0.030771,-1.245611,-1.155787,-0.96671,0.059273,-1.398762,0.000548,0,0,0
799,0.955061,1.48019,1.747432,0.650313,0.059273,1.610947,0.420049,0,0,1
747,-0.955138,0.069187,0.311103,0.326908,1.229247,1.610947,0.064906,0,0,0


In [64]:
report[report['true target'] == 1]

Unnamed: 0,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,probability,predict target,model predict,true target
799,0.955061,1.48019,1.747432,0.650313,0.059273,1.610947,0.420049,0,0,1
553,0.646964,0.838825,0.830626,1.009651,1.091603,1.009005,0.360628,0,0,1
418,0.646964,1.48019,2.053035,2.806344,1.435713,-0.194879,0.902341,1,1,1
420,1.571254,0.838825,2.664239,1.728328,1.435713,-0.194879,0.888663,1,1,1
549,0.338868,0.518143,1.900234,0.650313,1.951878,0.407063,0.500669,1,1,1
699,0.338868,1.768804,0.525024,0.003504,1.366891,1.009005,0.147503,0,0,1
423,2.187446,2.121555,0.830626,0.650313,0.747493,-0.194879,0.375837,0,0,1
430,-0.277325,2.762919,-0.08618,3.165682,0.747493,-0.194879,0.464606,0,0,1
431,0.492916,0.518143,0.677825,0.650313,3.844483,-0.194879,0.563367,1,1,1
797,0.955061,-0.443905,2.358637,2.087667,0.059273,1.610947,0.798804,1,1,1


# Element-wise

## DataFrame

<b><code>applymap</code></b>

```python
DataFrame.applymap(func)
```

map each value to $e^\sqrt{value}$

In [72]:
stats_mapped = stats.applymap(lambda v: np.exp(np.sqrt(v)))
stats_mapped.head(3)

Unnamed: 0,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation
0,1096.633158,1096.633158,3172.444636,3172.444636,819.098162,2.718282
1,2628.077512,2799.661884,7663.866574,7663.866574,2312.227657,2.718282
2,8564.535168,9049.217593,22026.465795,22026.465795,7663.866574,2.718282


## Series

<b><code>apply</code>, <code>map</code></b>

In [79]:
pokemon['Legendary'].apply(lambda v: int(v)).head()

0    0
1    0
2    0
3    0
4    0
Name: Legendary, dtype: int64

In [80]:
pokemon['Legendary'].map(lambda v: 1 if v else 0).head()

0    0
1    0
2    0
3    0
4    0
Name: Legendary, dtype: int64