In [3]:
from random import normalvariate

from faker import Faker
from texttable import Texttable

faker = Faker()

In [8]:
def print_table(headers, rows):
    table = Texttable()
    table.add_rows([headers] + rows)
    print(table.draw())

In [34]:
class Employee():
    def __init__(self):
        self.name = faker.name()
        self.email = faker.email()
        self.salary = int(max(1000, normalvariate(2000, 2000)))

In [35]:
with open('./employees.csv', 'w') as f:
    f.write('name,email,salary\n')
    for _ in range(1000):
        e = Employee()
        f.write(f'{e.name},{e.email},{e.salary}\n')

In [1]:
!head -n 10 ./employees.csv

name,email,salary
Hedwig Collier,leathaluettgen@hotmail.com,4725
Agness Gislason,faheykem@volkman.org,3528
Seaborn Cruickshank,owiza@daniel-wiegand.com,4248
Mr. Nat Williamson,voconnell@oberbrunner.org,2748
Alexis Mayer MD,xwilderman@gmail.com,1000
Latonia Dietrich,welcheugenie@yahoo.com,1000
Brittanie Jones,will80@hotmail.com,1000
Colbert Keebler,dameon15@corwin.org,1185
Mrs. Patsy Smitham,alivia49@hagenes-koelpin.info,1000


## Select all with simple filter

```sql
SELECT 
    * 
FROM 
    employees 
WHERE 
    salary > 2000 
```

In [9]:
with open('./employees.csv', 'r') as f:
    headers = [h for h in f.readline().split(',')]
    
    rows = []
    for raw in f.readlines():
        name, email, salary = raw.split(',')
        
        if int(salary) > 2000:
            rows.append([name, email, salary])
        
print_table(headers, rows)

+-------------------------------+-------------------------------------+--------+
|             name              |                email                | salary |
|                               |                                     |        |
| Hedwig Collier                | leathaluettgen@hotmail.com          | 4725   |
+-------------------------------+-------------------------------------+--------+
| Agness Gislason               | faheykem@volkman.org                | 3528   |
+-------------------------------+-------------------------------------+--------+
| Seaborn Cruickshank           | owiza@daniel-wiegand.com            | 4248   |
+-------------------------------+-------------------------------------+--------+
| Mr. Nat Williamson            | voconnell@oberbrunner.org           | 2748   |
+-------------------------------+-------------------------------------+--------+
| Shawna Deckow                 | ryancleveland@botsford-hauck.com    | 2285   |
+---------------------------

## Select all with universal filter

```sql
SELECT 
    * 
FROM 
    employees 
WHERE 
    salary > 2000 and salary < 4000
```

In [12]:
def where(condition):
    with open('./employees.csv', 'r') as f:
        headers = f.readline().split(',')

        rows = []
        for raw in f.readlines():
            name, email, salary = raw.split(',')
            
            if condition(dict(name=name, email=email, salary=int(salary))):
                rows.append([name, email, salary])
        
    print_table(headers, rows)
    
where(lambda x: x['salary'] > 2000 and x['salary'] < 4000)    

+-------------------------------+-------------------------------------+--------+
|             name              |                email                | salary |
|                               |                                     |        |
| Agness Gislason               | faheykem@volkman.org                | 3528   |
+-------------------------------+-------------------------------------+--------+
| Mr. Nat Williamson            | voconnell@oberbrunner.org           | 2748   |
+-------------------------------+-------------------------------------+--------+
| Shawna Deckow                 | ryancleveland@botsford-hauck.com    | 2285   |
+-------------------------------+-------------------------------------+--------+
| Bettyjane Miller              | rgibson@vonrueden.com               | 3777   |
+-------------------------------+-------------------------------------+--------+
| Benedict Conn                 | selma82@ratke.com                   | 2344   |
+---------------------------

## Complex query

```sql
SELECT 
    name,
    salary
FROM 
    employees 
WHERE 
    name LIKE B%
ORDER BY salary
LIMIT 5
```

In [53]:
(Table('employees.csv')    
    .select('name', 'salary')
    .where(lambda x: x['name'].startswith('B'))
    .order_by('salary', asc=False)
    .limit(5)
    .collect())

+--------------------+--------+
|        name        | salary |
| Brittnee Miller    | 5483   |
+--------------------+--------+
| Burt Wiza          | 5332   |
+--------------------+--------+
| Blanca Runolfsson  | 4682   |
+--------------------+--------+
| Berniece Muller MD | 4128   |
+--------------------+--------+
| Bolden Keeling     | 3856   |
+--------------------+--------+


In [52]:
class Table:
    
    def __init__(self, filepath):
        with open(filepath, 'r') as f:
            self.columns = f.readline().strip().split(',')
            self.rows = []
            
            for line in f.readlines():
                entries = line.strip().split(',')
                
                self.rows.append({
                    column: entries[i]
                    for i, column in enumerate(self.columns)
                })
            
    def select(self, *columns):
        self.columns = columns
        
        return self
    
    def limit(self, limit):
        self.rows = self.rows[:limit]

        return self
    
    def offset(self, offset):
        self.rows = self.rows[offset:]
    
        return self
    
    def order_by(self, column, asc=True):
        self.rows = sorted(self.rows, key=lambda x: x[column], reverse=not asc)
        
        return self
    
    def where(self, condition):
        self.rows = [row for row in self.rows if condition(row)]
        
        return self
    
    def collect(self):        
        print_table(
            self.columns, 
            [[row[column] for column in self.columns] for row in self.rows])

In [46]:
"""
terms:
    contextmanager
    class
    constructor
    lambda
    *args
    self
    slicing
    list comprehensions
    list
    dictionary comprehensions
    dictionary
    enumerate & iterables
    function
    io / readline(s)
    strip / split --> strings in python
    chain pattern
    random --> std library
    faker --> 3rd party
    textable --> 3rd party
"""

In [None]:
BRICKS:
- which terms I should know in Big Data --> answer honestly --> all answers should be true --> 
I will use it as a quick review to understand how people answered
- big data terms
- python stuff 
- calculation stuff --> to train them in in memory calculations
