In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns

import random
from IPython.display import display
from tqdm import tqdm as progressbar
from collections import Counter, defaultdict

from dask.distributed import Client
from dask.distributed import progress
client = Client()

import kvanti

BOOKPATH = '../nyata2017/docs/'

# Big Data a színfalak mögött

<b>Kvantitatív módszerek II. - VII. óra</b>, 2017 november 17.

# Python

> *A programozás a legközelebbi dolog a szupererőhöz.*  
> Drew Houston (Dropbox)

In [None]:
print "Hello Kvanti!"

# I. Big Data?

<img src="pics/meklogo.gif" source="http://mek.oszk.hu" align='left'>
## Big Data-e egy teljes könyvtár könyveinek elemzése?

In [None]:
bookpaths = kvanti.list_books(BOOKPATH)
len(bookpaths)

In [None]:
with open(bookpaths[0]) as bookfile:
    print bookfile.readline()[:140]

In [None]:
books = []
for bookpath in progressbar(bookpaths[:1000]):
    with open(bookpath) as bookfile:
        books.append(bookfile.readline())

In [None]:
kvanti.mean(books)

In [None]:
kvanti.plot_histogram(books)

### Gyorselemzés: Melyek a MEK leggyakoribb szavai?

In [None]:
wordcount = defaultdict(int)

for book in progressbar(books):
    for word in book.split():
        wordcount[word.lower()] += 1

kvanti.print_most_common(wordcount)

<img src="pics/census.jpg" width="100" source="http://www.budaorsiinfo.hu/wp-content/uploads/2011/09/Nepszamlalas2011_logo1.jpg" align='left'>
## Big Data-e a népszámlálás?

Átlagszámítás egy `10.000.000 x 500` mátrixon (5 Milliárd adatpont ~4 GB adat).

In [None]:
%%time
x = kvanti.generate_random_matrix(size=(1000000, 500))
y = kvanti.mean(x)

### Párhuzamosítás

<img src="pics/embarrassing.gif" source="https://github.com/dask/dask-tutorial" width="800px"/>

In [None]:
%%time
x = kvanti.generate_random_matrix(size=(1000000, 500), distribute=True)
y = kvanti.mean(x)
y.compute()

### Skálázás

<img src="pics/verticalvshorizontal.png" source="http://www.pc-freak.net/blog/vertical-horizontal-server-services-scaling-vertical-horizontal-hardware-scaling/" />

<img src='pics/twitter-large.png' width="100" align="left">
## Big Data-e egy percnyi twitter feed?

## Hogyan dolgozzuk fel? - A MapReduce paradigma

<img src="pics/mapreduce.png" source="https://wikis.nyu.edu/display/NYUHPC/Big+Data+Tutorial+1%3A+MapReduce" width="750px"/>

In [None]:
books = kvanti.load_books(BOOKPATH + '00*.txt')
splitted = (books.str.lower()
                 .str.split()
                 .flatten())
mapped = splitted.frequencies()
reduced = mapped.topk(10, lambda x: x[1])

In [None]:
wordcount = client.compute(reduced)
progress(wordcount)

In [None]:
results = client.gather(wordcount)
kvanti.print_most_common(results)
client.close()

## Miért jók a Big Data megoldások?

- elosztott
- dinamikusan skálázható
- hibatűrő

## Alternatívák Big Data feldolgozására

<img src="pics/solutions.jpg" source1="http://www.tomsitpro.com/articles/mesos-mesosphere-data-center-open-source-apache,1-2001.html" source2="https://www.slideshare.net/PowerPoint-Templates/computers-and-servers-powerpoint-presentation-slides-ppt-templates" width="600">


# II. Kitérő

## Kő-papír-olló
<img src="pics/rock.gif" align='left'/>
<img src="pics/paper.gif" align='left'/>
<img src="pics/scissors.gif" align='left'/>

<img src="pics/janken.gif" />

In [None]:
hands = ['ko', 'papir', 'ollo']
random.choice(hands)

In [None]:
games = []
for _ in range(10000):
    games.append(random.choice(hands))
    
kvanti.plot_freqs(Counter(games));

## Jósolható a taktikánk?

<img src="pics/rock.gif" align='left'/>
<img src="pics/paper.gif" align='left'/>
<img src="pics/paper.gif" align='left'/>
<img src="pics/scissors.gif" align='left'/>
<img src="pics/rock.gif" align='left'/>
<img src="pics/rock.gif" align='left'/>
<img src="pics/rock.gif" align='left'/>
<img src="pics/paper.gif" align='left'/>
<img src="pics/scissors.gif" align='left'/>
<img src="pics/rock.gif" align='left'/>
<img src="pics/paper.gif" align='left'/>
<img src="pics/scissors.gif" align='left'/>
<img src="pics/paper.gif" align='left'/>
<img src="pics/paper.gif" align='left'/>
<img src="pics/rock.gif" align='left'/>
<img src="pics/rock.gif" align='left'/>
<img src="pics/scissors.gif" align='left'/>

In [None]:
my_play = 'kppokkkpokpoppkko'

## Véletlenszerű MI

In [None]:
rnd = kvanti.RPS('null', 42)
rnd.play(my_play, plot=True);

In [None]:
rnd.plot_probs();

## Naív MI

A gyakrabban használt kezeket jegyezzük meg

In [None]:
naive = kvanti.RPS('naive', 42)
naive.play(my_play, plot=True);

In [None]:
naive.plot_probs();

## Emlékező MI

Jegyezzük meg, hogy milyen kezek után mik következnek

<img src="pics/stateful.png" width="400" align='left'>

In [None]:
stateful = kvanti.RPS('stateful', 42)
stateful3 = kvanti.RPS('stateful', 42, 3)

In [None]:
stateful3.play(my_play)
stateful.play(my_play, plot=True);

In [None]:
stateful.plot_probs();

## Gépek csatája

In [None]:
kvanti.Simulate(stateful, stateful3, 50000, p1static=True, p2static=False).play(plot=True);

# III. Lehetőségek

## Neurális hálózatok

### Perceptron model
<img src="pics/neuron.png" width="400" align="left">

## Többrétegű hálózat
<img src="pics/mlp.png" width="400" align="left" source="https://github.com/nikolaypavlov/MLPNeuralNet">

## Mélytanulás

<img src="pics/mgc.gif"/>

<img src="pics/deepdream.jpg" source="https://artofericwayne.com/2015/07/08/google-deep-dream-getting-too-good/"/>

<img src="pics/deeprebrandt.jpg" source="http://www.wired.co.uk/article/new-rembrandt-painting-computer-3d-printed"/>

<img src="pics/deeptransform.jpg" source="https://deepart.io/"/>

> *PANDARUS:*  
> *Alas, I think he shall be come approached and the day*  
> *When little srain would be attain'd into being never fed,*  
> *And who is but a chain and subjects of his death,*  
> *I should not sleep.*  
> ...  
>   
> *Clown:*  
> *Come, sir, I will make did behold your worship.*  
>   
> *VIOLA:*  
> *I'll drink it.*  

\- William RNNspear

<img src="pics/deeppaper.jpg" source="http://karpathy.github.io/2015/05/21/rnn-effectiveness/">

<img src="pics/w2v-context-words.png" source="https://blog.acolyer.org/2016/04/21/the-amazing-power-of-word-vectors/">

<img src="pics/w2v-king-queen-vectors.png" source="https://blog.acolyer.org/2016/04/21/the-amazing-power-of-word-vectors/" align="left" width="400px">

<img src="pics/w2v-king-queen-composition.png" source="https://blog.acolyer.org/2016/04/21/the-amazing-power-of-word-vectors/" align="right" width="400px">

<img src="pics/gan_cats.gif" source="https://github.com/AlexiaJM/Deep-learning-with-cats">

<img src="pics/DLmario.gif" paper="http://nn.cs.utexas.edu/downloads/papers/stanley.ec02.pdf" source-code="https://pastebin.com/ZZmSNaHX" source="https://www.youtube.com/watch?v=qv6UVOQ0F44">

<img src="pics/alphagowins.jpg" align="left" source="https://gogameguru.com/lee-sedol-defeats-alphago-masterful-comeback-game-4/">
<img src="pics/DLagentloses.jpg" align="right" source="http://www.dailymail.co.uk/sciencetech/article-3978068/DeepMind-s-challenge-StarCraft-2-Google-s-secretive-AI-playing-game-develop-human-like-reasoning.html" related-article="https://www.technologyreview.com/s/609242/humans-are-still-better-than-ai-at-starcraftfor-now/">

<img src="pics/inspiroquotes.jpg" source="http://inspirobot.me/">

## Neurális MI

In [None]:
nn = kvanti.RPS('neural', 42)
nn.play(my_play, plot=True);

# IV. Demo

In [None]:
display(kvanti.generate_interface(nn))

In [None]:
nn.plot_win_ratio();

# V. Ajánló rendszerek

<img src="pics/logos.png">

## Collaborative filtering

<img src="pics/cfinteract.png" />

<img src="pics/cfmatrix.png" />

## Tartalom alapú módszerek 

<img src="pics/cbinteract.png" />

# VI. Kérdések

- Hol lehet hasznos a Big Data a társadalomtudományokban?

- Soroljunk fel Big Data problémákat!

- Mire nem alkalmasak a Big Data módszerek?  

- Melyek nem Big Data problémák?

# Köszönöm a megtisztelő figyelmet!

**Források:**

- Python nyelv kezdőknek: http://mek.oszk.hu/08400/08435/08435.pdf
- Deep learning:
    - Deep Dream: https://artofericwayne.com/2015/07/08/google-deep-dream-getting-too-good/
    - Rembrandt 3d nyomtatás: http://www.wired.co.uk/article/new-rembrandt-painting-computer-3d-printed
    - Képek átalakítása festői stílusokban: https://deepart.io/
    - Hogyan értik meg a neurális hálózatok a képeket: https://distill.pub/2017/feature-visualization/
    - Generatív nyelvek: http://karpathy.github.io/2015/05/21/rnn-effectiveness/
    - word2vec: 
        - általános leírás: https://blog.acolyer.org/2016/04/21/the-amazing-power-of-word-vectors/
        - kipróbálható online alkalmazás:  https://rare-technologies.com/word2vec-tutorial/
        - tutorial:  http://mccormickml.com/2016/04/19/word2vec-tutorial-the-skip-gram-model/
    - Deep Learning cats: https://github.com/AlexiaJM/Deep-learning-with-cats
    - MarI/O: https://www.youtube.com/watch?v=qv6UVOQ0F44
    - Mario Kart neurális hálózat: https://www.youtube.com/watch?v=Ipi40cb_RsI
    - AlphaGo wins: https://gogameguru.com/lee-sedol-defeats-alphago-masterful-comeback-game-4/
    - Deep Learning agents loses in StarCraft: https://www.technologyreview.com/s/609242/humans-are-still-better-than-ai-at-starcraftfor-now/
    - Inspirobot: http://inspirobot.me/
- Neurális Kő-Papír-Olló ágensek:
    - https://www.his.se/PageFiles/8158/Henrik_Engstrom.pdf
    - http://act-r.psy.cmu.edu/wordpress/wp-content/uploads/2012/12/874paper203.pdf
- Hogyan nyerjünk emberi ellenfél ellen: 
    - http://worldrps.com/how-to-beat-anyone-at-rock-paper-scissors/
    - https://www.youtube.com/watch?v=rudzYPHuewc
    - http://bit.ly/RPSpaper
    - http://blog.wolfram.com/2014/01/20/how-to-win-at-rock-paper-scissors/
- RoChamBo - Kő-Papír-Olló MI bajnokság:
    - https://github.com/NavjotMinhas/RockPaperScissors_AI
    - https://www.reddit.com/r/gamedev/comments/doxb3/simple_ai_for_a_rockpaperscissors_game/
    - http://webdocs.cs.ualberta.ca/~darse/rsbpc.html