## Create Input

In [None]:
%%writefile person_1.txt
Peter,25
Andrea,21
Urs,37

In [None]:
%%writefile person_2.txt
Monika,21
Andrea,23
Urs,37

## SELECTION

```bash
mysql> SELECT * FROM person_1 WHERE age > 22;
+-------+------+
| name  | age  |
+-------+------+
| Peter |   25 |
| Urs   |   37 |
+-------+------+
2 rows in set (0.00 sec)
```



In [None]:
%%writefile selection.py

#!/usr/bin/python3
from mrjob.job import MRJob

class MyJob(MRJob):

    def mapper(self, _, line):
        name, age = line.split(",")
        age = int(age)
        if age > 22:
            yield name, age

if __name__ == '__main__':
    MyJob.run()

In [None]:
!python selection.py person_1.txt

## Projection

```bash
mysql> SELECT name FROM person_1;
+--------+
| name   |
+--------+
| Peter  |
| Andrea |
| Urs    |
+--------+
3 rows in set (0.00 sec)
```

In [None]:
%%writefile projection.py

#!/usr/bin/python3
from mrjob.job import MRJob

class MyJob(MRJob):

    def mapper(self, _, line):
        name, _ = line.split(",")
        yield name, None

if __name__ == '__main__':
    MyJob.run()

In [None]:
!python projection.py person_1.txt

## UNION

```bash
mysql> SELECT * FROM person_1 UNION SELECT * FROM person_2;
+--------+------+
| name   | age  |
+--------+------+
| Peter  |   25 |
| Andrea |   21 |
| Urs    |   37 |
| Monika |   21 |
| Andrea |   23 |
+--------+------+
```

In [None]:
%%writefile union.py

#!/usr/bin/python3
from mrjob.job import MRJob

class MyJob(MRJob):

    def mapper(self, _, line):
        name, age = line.split(",")
        yield [name,age], None
    
    def reducer(self, key, _):
        name = key[0]
        age = key[1]
        yield name, age

if __name__ == '__main__':
    MyJob.run()

In [None]:
!python union.py person_1.txt person_2.txt

## UNION ALL

```bash
mysql> SELECT * FROM person_1 UNION ALL SELECT * FROM person_2;
+--------+------+
| name   | age  |
+--------+------+
| Peter  |   25 |
| Andrea |   21 |
| Urs    |   37 |
| Monika |   21 |
| Andrea |   23 |
| Urs    |   37 |
+--------+------+
```

In [None]:
%%writefile union_all.py

#!/usr/bin/python3
from mrjob.job import MRJob

class MyJob(MRJob):

    def mapper(self, _, line):
        name, age = line.split(",")
        yield name,age
    

if __name__ == '__main__':
    MyJob.run()

In [None]:
!python union_all.py person_1.txt person_2.txt

## INNER JOIN

```bash
mysql> SELECT
    ->     p1.name as name, p1.age as p1_age, p2.age as p2_age
    -> FROM
    ->     person_1 AS p1
    -> INNER JOIN person_2 AS p2
    -> ON
    ->     p1.name = p2.name;
+--------+--------+--------+
| name   | p1_age | p2_age |
+--------+--------+--------+
| Andrea |     21 |     23 |
| Urs    |     37 |     37 |
+--------+--------+--------+
```

In [None]:
%%writefile inner_join.py

#!/usr/bin/python3
from mrjob.job import MRJob
import os

class MyJob(MRJob):

    def mapper(self, _, line):
        file_name = os.environ['mapreduce_map_input_file']
        name, age = line.split(",")
        
        if file_name.endswith("person_1.txt"):
            yield name, {"p1_age": age}
        else:
            yield name, {"p2_age": age}
    
    def reducer(self, name, ages):
        ages = list(ages)
        if len(ages) == 2:
            yield name, ages
            

if __name__ == '__main__':
    MyJob.run()

In [None]:
!python inner_join.py person_1.txt person_2.txt