In [1]:
import csv
with open('laptops.csv', 'r', encoding='UTF-8') as csvfile:
    spamreader = csv.reader(csvfile)
    rows = list(spamreader)
    header = rows.pop(0)
#Test to see there's data in the csv file
print(rows[:5])

import time
def timit(func, *args):
    start = time.time()
    func(*args)
    end = time.time()
    elapsed = end - start
    print(f"Function {func.__name__} took {elapsed} seconds with args {args}")

[['6571244', 'Apple', 'MacBook Pro', 'Ultrabook', '13.3', 'IPS Panel Retina Display 2560x1600', 'Intel Core i5 2.3GHz', '8GB', '128GB SSD', 'Intel Iris Plus Graphics 640', 'macOS', '1.37kg', '1339'], ['7287764', 'Apple', 'Macbook Air', 'Ultrabook', '13.3', '1440x900', 'Intel Core i5 1.8GHz', '8GB', '128GB Flash Storage', 'Intel HD Graphics 6000', 'macOS', '1.34kg', '898'], ['3362737', 'HP', '250 G6', 'Notebook', '15.6', 'Full HD 1920x1080', 'Intel Core i5 7200U 2.5GHz', '8GB', '256GB SSD', 'Intel HD Graphics 620', 'No OS', '1.86kg', '575'], ['9722156', 'Apple', 'MacBook Pro', 'Ultrabook', '15.4', 'IPS Panel Retina Display 2880x1800', 'Intel Core i7 2.7GHz', '16GB', '512GB SSD', 'AMD Radeon Pro 455', 'macOS', '1.83kg', '2537'], ['8550527', 'Apple', 'MacBook Pro', 'Ultrabook', '13.3', 'IPS Panel Retina Display 2560x1600', 'Intel Core i5 3.1GHz', '8GB', '256GB SSD', 'Intel Iris Plus Graphics 650', 'macOS', '1.37kg', '1803']]


In [2]:
class Inventory: # Initial, barebones iteration
    def __init__(self, csv_filename):
        with open(csv_filename, 'r', encoding='UTF-8') as csvfile:
            self.rows = list(csv.reader(csvfile))
            self.header = self.rows.pop(0)

In [3]:
inventory = Inventory('laptops.csv')
print(inventory.header)
print(len(inventory.rows))

['Id', 'Company', 'Product', 'TypeName', 'Inches', 'ScreenResolution', 'Cpu', 'Ram', 'Memory', 'Gpu', 'OpSys', 'Weight', 'Price']
1303


Adding a way to look up data in the csv file

In [4]:
class Inventory: # First modified iteration
    def __init__(self, csv_filename):
        with open(csv_filename, 'r', encoding='UTF-8') as csvfile:
            self.rows = list(csv.reader(csvfile))
            self.header = self.rows.pop(0)
    def get_laptop_from_id(self, laptop_id):
        for row in self.rows:
            if row[0] == laptop_id:
                return row
        return None

In [5]:
# Test latest inventory class
inventory = Inventory('laptops.csv')
assert inventory.get_laptop_from_id('3362737')
assert inventory.get_laptop_from_id('3362736') is None
timit(inventory.get_laptop_from_id, '3362736')

Function get_laptop_from_id took 0.00010561943054199219 seconds with args ('3362736',)


Seeking to optimize lookup from O(N) to O(1)

In [6]:
class Inventory: # Second modified iteration
    def __init__(self, csv_filename):
        with open(csv_filename, 'r', encoding='UTF-8') as csvfile:
            self.rows = list(csv.reader(csvfile))
            self.header = self.rows.pop(0)
        # Pre-process data into dictionary for easy lookup
        self.id_to_row = {row[0]: row[1:] for row in self.rows}
    def get_laptop_from_id_fast(self, laptop_id):
        if laptop_id in self.id_to_row.keys():
            return self.id_to_row[laptop_id]
        else:
            return None
    def get_laptop_from_id(self, laptop_id):
        for row in self.rows:
            if row[0] == laptop_id:
                return row
        return None

In [11]:
# Test latest inventory class
inventory = Inventory('laptops.csv')
assert inventory.get_laptop_from_id_fast('3362737')
assert inventory.get_laptop_from_id_fast('3362736') is None
timit(inventory.get_laptop_from_id_fast, '3362736')

Function get_laptop_from_id_fast took 1.1920928955078125e-06 seconds with args ('3362736',)


In [8]:
import time
import random

def timeit(funct, *args):
    start = time.time()
    funct(*args)
    end = time.time()
    return end - start

rand_values = [str(random.randint(10**6, 10**7-1)) for _ in range(10**5)]
inventory = Inventory('laptops.csv')
total_time_no_dict = 0
for id in rand_values:
    secs = timeit(inventory.get_laptop_from_id, id)
    total_time_no_dict += secs
total_time_dict = 0
for id in rand_values:
    secs = timeit(inventory.get_laptop_from_id_fast, id)
    total_time_dict += secs
print(total_time_no_dict, total_time_dict)

10.777134656906128 0.053897857666015625


In [9]:
class Inventory: # Second modified iteration
    def __init__(self, csv_filename):
        with open(csv_filename, 'r', encoding='UTF-8') as csvfile:
            self.rows = list(csv.reader(csvfile))
            self.header = self.rows.pop(0)
        # Pre-process data into dictionary for easy lookup
        self.id_to_row = {row[0]: row[1:] for row in self.rows}
    
    def get_laptop_from_id_fast(self, laptop_id):
        if laptop_id in self.id_to_row.keys():
            return self.id_to_row[laptop_id]
        else:
            return None
    
    def get_laptop_from_id(self, laptop_id):
        for row in self.rows:
            if row[0] == laptop_id:
                return row
        return None
    
    def check_promotion_dollars(self, dollars):
        price_idx = -1
        for row in self.rows:
            if int(row[price_idx]) == dollars:
                return True
        for i in range(len(self.rows)):
            for n in range(i, len(self.rows)):
                p_total = int(self.rows[i][price_idx]) + int(self.rows[n][price_idx])
                if p_total == dollars:
                    return True
        return False

In [10]:
# Test latest inventory class
inventory = Inventory('laptops.csv')
assert inventory.check_promotion_dollars(1000)
assert not inventory.check_promotion_dollars(442)
timit(inventory.check_promotion_dollars, 442)

Function check_promotion_dollars took 0.5663290023803711 seconds with args (442,)


Lower time complexity of all queries by pre-processing the price combinations during __init__.

In [14]:
class Inventory: # Third modified iteration
    def __init__(self, csv_filename):
        with open(csv_filename, 'r', encoding='UTF-8') as csvfile:
            self.rows = list(csv.reader(csvfile))
            self.header = self.rows.pop(0)
        # Pre-process data into dictionary for easy lookup
        self.id_to_row = {row[0]: row[1:] for row in self.rows}
        self.prices = set()
        self.price_idx = -1
        for row in self.rows:
            self.prices.add(int(row[self.price_idx]))
    
    def get_laptop_from_id_fast(self, laptop_id):
        if laptop_id in self.id_to_row.keys():
            return self.id_to_row[laptop_id]
        else:
            return None
    
    def get_laptop_from_id(self, laptop_id):
        for row in self.rows:
            if row[0] == laptop_id:
                return row
        return None
    
    def check_promotion_dollars(self, dollars):
        price_idx = -1
        for row in self.rows:
            if int(row[price_idx]) == dollars:
                return True
        for i in range(len(self.rows)):
            for n in range(i, len(self.rows)):
                p_total = int(self.rows[i][price_idx]) + int(self.rows[n][price_idx])
                if p_total == dollars:
                    return True
        return False
    
    def check_promotion_dollars_fast(self, dollars):
        if dollars in self.prices:
            return True
        for row in self.rows:
            price1 = int(row[self.price_idx])
            price2 = dollars - price1
            if price2 in self.prices:
                return True
        return False

In [15]:
# Test latest inventory class
inventory = Inventory('laptops.csv')
assert inventory.check_promotion_dollars_fast(1000)
assert not inventory.check_promotion_dollars_fast(442)
timit(inventory.check_promotion_dollars_fast, 442)

Function check_promotion_dollars_fast took 0.0005924701690673828 seconds with args (442,)


In [16]:
prices = [random.randint(100, 5000) for _ in range(100)]
inventory = Inventory('laptops.csv')
total_time_no_set = 0
for price in prices:
    total_time_no_set += timeit(inventory.check_promotion_dollars, price)
total_time_set = 0
for price in prices:
    total_time_set += timeit(inventory.check_promotion_dollars_fast, price)
print(f"Time without a set: {total_time_no_set}.\nTime with a set: {total_time_set}.")

Time without a set: 3.364323377609253.
Time with a set: 0.002518177032470703.


Implement sorted prices to use binary search in looking for laptops under a certain price

In [48]:
class Inventory: # Fourth modified iteration
    def __init__(self, csv_filename):
        with open(csv_filename, 'r', encoding='UTF-8') as csvfile:
            self.rows = list(csv.reader(csvfile))
            self.header = self.rows.pop(0)
        # Pre-process data into dictionary for easy lookup
        self.id_to_row = {row[0]: row[1:] for row in self.rows}
        self.prices = set()
        self.price_idx = -1
        for row in self.rows:
            self.prices.add(int(row[self.price_idx]))
        def row_price(row):
            return int(row[self.price_idx])
        self.rows_by_price = sorted(self.rows, key=row_price)
    
    def get_laptop_from_id_fast(self, laptop_id):
        if laptop_id in self.id_to_row.keys():
            return self.id_to_row[laptop_id]
        else:
            return None
    
    def get_laptop_from_id(self, laptop_id):
        for row in self.rows:
            if row[0] == laptop_id:
                return row
        return None
    
    def check_promotion_dollars(self, dollars):
        price_idx = -1
        for row in self.rows:
            if int(row[price_idx]) == dollars:
                return True
        for i in range(len(self.rows)):
            for n in range(i, len(self.rows)):
                p_total = int(self.rows[i][price_idx]) + int(self.rows[n][price_idx])
                if p_total == dollars:
                    return True
        return False
    
    def check_promotion_dollars_fast(self, dollars):
        if dollars in self.prices:
            return True
        for row in self.rows:
            price1 = int(row[self.price_idx])
            price2 = dollars - price1
            if price2 in self.prices:
                return True
        return False
    
    def find_first_laptop_more_expensive(self, target_price):
        range_start = 0
        range_end = len(self.rows_by_price) - 1
        while range_start < range_end:
            range_middle = (range_end + range_start) // 2
            price1 = int(self.rows_by_price[range_middle-1][self.price_idx])
            price2 = int(self.rows_by_price[range_middle][self.price_idx])
            if price1 <= target_price and price2 > target_price:
                return range_middle
            elif price1 < target_price and price2 <= target_price:
                range_start = range_middle + 1
            else:
                range_end = range_middle
        price = int(self.rows_by_price[range_start][self.price_idx])
        if price < target_price:
            return -1
        return range_start

In [49]:
# Test latest inventory class
inventory = Inventory('laptops.csv')
print(inventory.find_first_laptop_more_expensive(1000))
assert inventory.find_first_laptop_more_expensive(1000) == 683
assert inventory.find_first_laptop_more_expensive(10000) == -1
timit(inventory.find_first_laptop_more_expensive, 10000)

683
Function find_first_laptop_more_expensive took 1.33514404296875e-05 seconds with args (10000,)


In [44]:
print(inventory.rows_by_price[683])
print(inventory.rows_by_price[684])
print((1301+1302)//2)

['8747948', 'Lenovo', 'ThinkPad T460', 'Notebook', '14', '1366x768', 'Intel Core i5 6200U 2.3GHz', '4GB', '508GB Hybrid', 'Intel HD Graphics 520', 'Windows 7', '1.70kg', '1002']
['5550925', 'Dell', 'Latitude 5580', 'Notebook', '15.6', '1366x768', 'Intel Core i5 7300U 2.6GHz', '8GB', '500GB HDD', 'Intel HD Graphics 620', 'Windows 10', '1.9kg', '1008']
1301


FROM DATAQUEST: If you want to push this project further, we suggest that you think about the following queries:

Imagine that we extend our budget query to take as input a range of prices, min_price and max_price, rather than a single price. Write a query that finds all laptops whose price is in the given range.
Sometimes, a customer wants a laptop with some characteristics such as, for instance, 8GB or RAM and a 256GB hard drive. It would be interesting for those customers to provide a way to find the cheapest laptop that matches the desired characteristics. For simplicity, focus only on the amount of RAM and hard drive capacity. You might need to convert those values to integers rather than using strings.