# Building Fast Queries on a CSV

## The Dataset

Here is a brief description of the rows:

- ID: A unique identifier for the laptop.
- Company: The name of the company that produces the laptop.
- Product: The name of the laptop.
- TypeName: The type of laptop.
- Inches: The size of the screen in inches.
- ScreenResolution: The resolution of the screen.
- CPU: The laptop CPU.
- RAM: The amount of RAM in the laptop.
- Memory: The size of the hard drive.
- GPU: The graphics card name.
- OpSys: The name of the operating system.
- Weight: The laptop weight.
- Price: The price of the laptop.

In [1]:
import csv

In [2]:
with open('./laptops.csv', encoding = 'UTF-8') as my_file:
    csv_file = csv.reader(my_file)
    laptops = list(csv_file)
    header = laptops[0]
    rows = laptops[1:]
    print(header)
    print(rows[:5])

['Id', 'Company', 'Product', 'TypeName', 'Inches', 'ScreenResolution', 'Cpu', 'Ram', 'Memory', 'Gpu', 'OpSys', 'Weight', 'Price']
[['6571244', 'Apple', 'MacBook Pro', 'Ultrabook', '13.3', 'IPS Panel Retina Display 2560x1600', 'Intel Core i5 2.3GHz', '8GB', '128GB SSD', 'Intel Iris Plus Graphics 640', 'macOS', '1.37kg', '1339'], ['7287764', 'Apple', 'Macbook Air', 'Ultrabook', '13.3', '1440x900', 'Intel Core i5 1.8GHz', '8GB', '128GB Flash Storage', 'Intel HD Graphics 6000', 'macOS', '1.34kg', '898'], ['3362737', 'HP', '250 G6', 'Notebook', '15.6', 'Full HD 1920x1080', 'Intel Core i5 7200U 2.5GHz', '8GB', '256GB SSD', 'Intel HD Graphics 620', 'No OS', '1.86kg', '575'], ['9722156', 'Apple', 'MacBook Pro', 'Ultrabook', '15.4', 'IPS Panel Retina Display 2880x1800', 'Intel Core i7 2.7GHz', '16GB', '512GB SSD', 'AMD Radeon Pro 455', 'macOS', '1.83kg', '2537'], ['8550527', 'Apple', 'MacBook Pro', 'Ultrabook', '13.3', 'IPS Panel Retina Display 2560x1600', 'Intel Core i5 3.1GHz', '8GB', '256GB 

## Inventory Class

In [3]:
class Inventory:
    def __init__(self, csv_filename):
        with open(csv_filename) as my_file:
            csv_file = csv.reader(my_file)
            laptops = list(csv_file)
            self.header = laptops[0]
            self.rows = laptops[1:]
        for i in range(len(self.rows)):
            self.rows[i][-1] = int(self.rows[i][-1])

In [4]:
inventory = Inventory('./laptops.csv')

In [5]:
inventory.header

['Id',
 'Company',
 'Product',
 'TypeName',
 'Inches',
 'ScreenResolution',
 'Cpu',
 'Ram',
 'Memory',
 'Gpu',
 'OpSys',
 'Weight',
 'Price']

In [6]:
len(inventory.rows)

1303

## Finding a Laptop From the Id

In [7]:
class Inventory:
    def __init__(self, csv_filename):
        with open(csv_filename) as my_file:
            csv_file = csv.reader(my_file)
            laptops = list(csv_file)
            self.header = laptops[0]
            self.rows = laptops[1:]
        for i in range(len(self.rows)):
            self.rows[i][-1] = int(self.rows[i][-1])

In [8]:
class Inventory:
    def __init__(self, csv_filename):
        with open(csv_filename) as my_file:
            csv_file = csv.reader(my_file)
            laptops = list(csv_file)
            self.header = laptops[0]
            self.rows = laptops[1:]
        for i in range(len(self.rows)):
            self.rows[i][-1] = int(self.rows[i][-1])
    def get_laptop_from_id(self, laptop_id):
        laptop_found = None
        for row in rows:
            if row[0] == laptop_id:
                laptop_found = row
        if laptop_found is None:
            print('Laptop does not exist')
        else:
            print(laptop_found)
                

In [9]:
inventory = Inventory('./laptops.csv')

In [10]:
inventory.get_laptop_from_id('3362736')

Laptop does not exist


## Improving Id Lookups

In [11]:
class Inventory:
    def __init__(self, csv_filename):
        with open(csv_filename) as my_file:
            csv_file = csv.reader(my_file)
            laptops = list(csv_file)
            self.header = laptops[0]
            self.rows = laptops[1:]
            
        for i in range(len(self.rows)):
            self.rows[i][-1] = int(self.rows[i][-1])
        self.id_to_row = {}
        for row in rows:
            self.id_to_row[row[0]] = row[1:]
    def get_laptop_from_id(self, laptop_id):
        laptop_found = None
        for row in rows:
            if row[0] == laptop_id:
                laptop_found = row
        if laptop_found is None:
            return None
        else:
            return laptop_found
        
    def get_laptop_from_id_fast(self, laptop_id):
        if laptop_id in self.id_to_row:
            return self.id_to_row[laptop_id]
        else:
            return None
        
        

In [12]:
inventory = Inventory('./laptops.csv')

In [13]:
inventory.get_laptop_from_id('3362737')

['3362737',
 'HP',
 '250 G6',
 'Notebook',
 '15.6',
 'Full HD 1920x1080',
 'Intel Core i5 7200U 2.5GHz',
 '8GB',
 '256GB SSD',
 'Intel HD Graphics 620',
 'No OS',
 '1.86kg',
 '575']

In [14]:
inventory.get_laptop_from_id('3362736')

## Comparing the Performance

In [15]:
import time
import random
ids = [str(random.randint(1000000, 9999999)) for i in range(10000)]

In [16]:
inventory = Inventory('./laptops.csv')

In [17]:
total_time_no_dict = 0
for i in ids:
    start = time.time()
    inventory.get_laptop_from_id(i)
    end = time.time()
    total_time_no_dict += end - start

print(total_time_no_dict)
    

1.6862046718597412


In [18]:
total_time_dict = 0
for i in ids:
    start = time.time()
    inventory.get_laptop_from_id_fast(i)
    end = time.time()
    total_time_dict += end - start

print(total_time_dict)

0.0063893795013427734


From the result above using dictionary is faster than going through the list. 

## Two Laptop Promotion

In [22]:
class Inventory:
    def __init__(self, csv_filename):
        with open(csv_filename) as my_file:
            csv_file = csv.reader(my_file)
            laptops = list(csv_file)
            self.header = laptops[0]
            self.rows = laptops[1:]
            
        for i in range(len(self.rows)):
            self.rows[i][-1] = int(self.rows[i][-1])
        self.id_to_row = {}
        for row in rows:
            self.id_to_row[row[0]] = row[1:]
    def get_laptop_from_id(self, laptop_id):
        laptop_found = None
        for row in rows:
            if row[0] == laptop_id:
                laptop_found = row
        if laptop_found is None:
            return None
        else:
            return laptop_found
        
    def get_laptop_from_id_fast(self, laptop_id):
        if laptop_id in self.id_to_row:
            return self.id_to_row[laptop_id]
        else:
            return None
        
    def check_promotion_dollars(self,dollars):
        for row in self.rows:
            if dollars == row[-1]:
                return True
        for row in self.rows:
            price_1 = row[-1]
            for row in self.rows:
                price_2 = row[-1]
                if (price_1 + price_2  ) == dollars:
                    return True
        return False
            

In [23]:
inventory = Inventory('./laptops.csv')

In [24]:
inventory.check_promotion_dollars(1000)

True

In [25]:
inventory.check_promotion_dollars(442)

False

## Optimizing Laptop Promotion

In [27]:
class Inventory:
    def __init__(self, csv_filename):
        with open(csv_filename) as my_file:
            csv_file = csv.reader(my_file)
            laptops = list(csv_file)
            self.header = laptops[0]
            self.rows = laptops[1:]
            
        for i in range(len(self.rows)):
            self.rows[i][-1] = int(self.rows[i][-1])
        self.id_to_row = {}
        self.prices = set()
        for row in rows:
            self.id_to_row[row[0]] = row[1:]
            self.prices.add(row[-1])
            
    def get_laptop_from_id(self, laptop_id):
        laptop_found = None
        for row in rows:
            if row[0] == laptop_id:
                laptop_found = row
        if laptop_found is None:
            return None
        else:
            return laptop_found
        
    def get_laptop_from_id_fast(self, laptop_id):
        if laptop_id in self.id_to_row:
            return self.id_to_row[laptop_id]
        else:
            return None
        
    def check_promotion_dollars(self,dollars):
        for row in self.rows:
            if dollars == row[-1]:
                return True
        for row in self.rows:
            price_1 = row[-1]
            for row in self.rows:
                price_2 = row[-1]
                if (price_1 + price_2  ) == dollars:
                    return True
        return False
    
    def check_promotion_dollars_fast(self, dollars):
        for price in self.prices:
            if price == dollars:
                return True
        for price in self.prices:
            price_1 = price
            for price in self.prices:
                price_2 = price
                if (price_1 + price_2) == dollars:
                    return True
        return False

In [28]:
inventory = Inventory('./laptops.csv')

In [29]:
inventory.check_promotion_dollars(1000)

True

In [30]:
inventory.check_promotion_dollars(442)

False

## Comparing Promotion Function

In [34]:
prices = [str(random.randint(100, 5000)) for i in range(100)]

In [35]:
total_time_no_set  = 0
for price in prices:
    start = time.time()
    inventory.check_promotion_dollars(price)
    end = time.time()
    total_time_no_set += end - start

print(total_time_no_set)
    

29.92643642425537


In [38]:
total_time_set = 0
for price in prices:
    start = time.time()
    inventory.check_promotion_dollars_fast(price)
    end = time.time()
    total_time_set += end - start

print(total_time_set)

8.451507568359375


Again, using set increase the speed by almost 4 times. 

## Finding Laptops Within a Budget

In [81]:
class Inventory:
    def __init__(self, csv_filename):
        with open(csv_filename) as my_file:
            csv_file = csv.reader(my_file)
            laptops = list(csv_file)
            self.header = laptops[0]
            self.rows = laptops[1:]
            
        for i in range(len(self.rows)):
            self.rows[i][-1] = int(self.rows[i][-1])
        
        self.id_to_row = {}
        self.prices = set()
        self.rows_by_price = sorted(rows, key = lambda x: x[-1])

        for i in range(len(self.rows_by_price)):
            self.rows_by_price[i][-1] = int(self.rows_by_price[i][-1])
            
        for row in rows:
            self.id_to_row[row[0]] = row[1:]
            self.prices.add(row[-1])
        
        
        
        
    def get_laptop_from_id(self, laptcop_id):
        laptop_found = None
        for row in rows:
            if row[0] == laptop_id:
                laptop_found = row
        if laptop_found is yNone:
            return None
        else:
            return laptop_found
        
    def get_laptop_from_id_fast(self, laptop_id):
        if laptop_id in self.id_to_row:
            return self.id_to_row[laptop_id]
        else:
            return None
        
    def check_promotion_dollars(self,dollars):
        for row in self.rows:
            if dollars == row[-1]:
                return True
        for row in self.rows:
            price_1 = row[-1]
            for row in self.rows:
                price_2 = row[-1]
                if (price_1 + price_2  ) == dollars:
                    return True
        return False
    
    def check_promotion_dollars_fast(self, dollars):
        for price in self.prices:
            if price == dollars:
                return True
        for price in self.prices:
            price_1 = price
            for price in self.prices:
                price_2 = price
                if (price_1 + price_2) == dollars:
                    return True
        return False
    
    def find_first_laptop_more_expensive(self, target_price): 
        range_start = 0                                   
        range_end = len(self.rows_by_price) - 1                   
        while range_start < range_end:
            range_middle = (range_end + range_start) // 2  
            price = self.rows_by_price[range_middle][-1]
            if price > target_price:
                range_end = range_middle
            else:
                range_start = range_middle + 1
        if self.rows_by_price[range_start][-1] <= target_price:                  
            return -1                                   
        return range_start

In [82]:
inventory = Inventory('./laptops.csv')

In [83]:
inventory.find_first_laptop_more_expensive(1000)

683

In [79]:
len(rows)

1303

In [None]:
i