In [1]:
import itertools


with open("cars.csv") as f:
    for _ in range(15):
        print(next(f), end="")

make,model
ACURA,ILX
ACURA,MDX
ACURA,RDX
ACURA,RLX
ACURA,TL
ACURA,TSX
ALFA ROMEO,4C
ALFA ROMEO,GIULIETTA
APRILIA,CAPONORD 1200
APRILIA,RSV4 FACTORY APRC ABS
APRILIA,RSV4 R APRC ABS
APRILIA,SHIVER 750
ARCTIC CAT,1000 XT
ARCTIC CAT,500 XT


In [2]:
# how many models exist for a given car make - group by basically

from collections import defaultdict

makes = defaultdict(int)

with open("cars.csv") as f:
    headers = next(f)
    for row in f:
        make, model = row.strip("\n").split(",")
        makes[make] += 1

In [3]:
makes

defaultdict(int,
            {'ACURA': 6,
             'ALFA ROMEO': 2,
             'APRILIA': 4,
             'ARCTIC CAT': 96,
             'ARGO': 4,
             'ASTON MARTIN': 5,
             'AUDI': 27,
             'BENTLEY': 2,
             'BLUE BIRD': 1,
             'BMW': 86,
             'BUGATTI': 1,
             'BUICK': 5,
             'CADILLAC': 7,
             'CAN-AM': 61,
             'CHEVROLET': 33,
             'CHRYSLER': 2,
             'DODGE': 7,
             'DUCATI': 4,
             'FERRARI': 6,
             'FIAT': 2,
             'FORD': 34,
             'FREIGHTLINER': 7,
             'GMC': 12,
             'HARLEY DAVIDSON': 29,
             'HINO': 7,
             'HONDA': 91,
             'HUSABERG': 4,
             'HUSQVARNA': 9,
             'HYUNDAI': 13,
             'INDIAN': 3,
             'INFINITI': 8,
             'JAGUAR': 9,
             'JEEP': 5,
             'JOHN DEERE': 19,
             'KAWASAKI': 59,
             'KENWORTH': 1

In [4]:
data = (1,2,2,2,3)
list(itertools.groupby(data))

[(1, <itertools._grouper at 0x107c10190>),
 (2, <itertools._grouper at 0x107c10520>),
 (3, <itertools._grouper at 0x107c10550>)]

In [5]:
data = (1,2,2,2,3,1,1,3,3,3,3)
it = itertools.groupby(data)
for group_key, sub_iter in it:
    print(group_key, list(sub_iter))

1 [1]
2 [2, 2, 2]
3 [3]
1 [1, 1]
3 [3, 3, 3, 3]


In [6]:
data = (
    (1,"abc"),
    (1,"bcd"),
    (2, "pyt"),
    (2, "yth"),
    (2, "tho"),
    (3, "hon"),
)
it = itertools.groupby(data, key=lambda x: x[0])
for group_key, sub_iter in it:
    print(group_key, list(sub_iter))

1 [(1, 'abc'), (1, 'bcd')]
2 [(2, 'pyt'), (2, 'yth'), (2, 'tho')]
3 [(3, 'hon')]


In [7]:
def gen_groups():
    for key in range(1, 4):
        for i in range(3):
            yield (key, i)
    
        for i in range(3):
            yield (key, i)
    

In [8]:
list(gen_groups())

[(1, 0),
 (1, 1),
 (1, 2),
 (1, 0),
 (1, 1),
 (1, 2),
 (2, 0),
 (2, 1),
 (2, 2),
 (2, 0),
 (2, 1),
 (2, 2),
 (3, 0),
 (3, 1),
 (3, 2),
 (3, 0),
 (3, 1),
 (3, 2)]

In [9]:
g = gen_groups()
groups = itertools.groupby(g, lambda x: x[0])

In [10]:
for group in groups:
    print(group[0], group[1])

1 <itertools._grouper object at 0x107c12ad0>
2 <itertools._grouper object at 0x107c121a0>
3 <itertools._grouper object at 0x107c12ad0>


In [11]:
# iterators gets consumed as usual, subiterators are also consumed
list(g), list(groups)

([], [])

In [12]:
g = gen_groups()
groups = itertools.groupby(g, lambda x: x[0])
for group in groups:
    print(group[0], list(group[1]))

1 [(1, 0), (1, 1), (1, 2), (1, 0), (1, 1), (1, 2)]
2 [(2, 0), (2, 1), (2, 2), (2, 0), (2, 1), (2, 2)]
3 [(3, 0), (3, 1), (3, 2), (3, 0), (3, 1), (3, 2)]


In [13]:
with open("cars.csv") as f:
    headers = next(f)
    make_groups = itertools.groupby(f, key=lambda x: x.replace("\n", "").split(",")[0])
    for make, models in make_groups:
        print(make, list(models), end="\n")


ACURA ['ACURA,ILX\n', 'ACURA,MDX\n', 'ACURA,RDX\n', 'ACURA,RLX\n', 'ACURA,TL\n', 'ACURA,TSX\n']
ALFA ROMEO ['ALFA ROMEO,4C\n', 'ALFA ROMEO,GIULIETTA\n']
APRILIA ['APRILIA,CAPONORD 1200\n', 'APRILIA,RSV4 FACTORY APRC ABS\n', 'APRILIA,RSV4 R APRC ABS\n', 'APRILIA,SHIVER 750\n']
ARCTIC CAT ['ARCTIC CAT,1000 XT\n', 'ARCTIC CAT,500 XT\n', 'ARCTIC CAT,550 XT\n', 'ARCTIC CAT,700 LTD\n', 'ARCTIC CAT,700 SUPER DUTY DIESEL\n', 'ARCTIC CAT,700 XT\n', 'ARCTIC CAT,90 2X4 4-STROKE\n', 'ARCTIC CAT,BEARCAT 570\n', 'ARCTIC CAT,BEARCAT 570 XT\n', 'ARCTIC CAT,BEARCAT Z1 XT\n', 'ARCTIC CAT,BEARCAT Z1 XT GS\n', 'ARCTIC CAT,BEARCAT Z1 XT LIMITED\n', 'ARCTIC CAT,DVX 300\n', 'ARCTIC CAT,DVX 90\n', 'ARCTIC CAT,F5\n', 'ARCTIC CAT,F570\n', 'ARCTIC CAT,M 6000 SNO PRO 153\n', 'ARCTIC CAT,M 8000 153\n', 'ARCTIC CAT,M 8000 HCR 153\n', 'ARCTIC CAT,M 8000 LIMITED 153\n', 'ARCTIC CAT,M 8000 LIMITED 162\n', 'ARCTIC CAT,M 8000 LIMITED ES 153\n', 'ARCTIC CAT,M 8000 LIMITED ES 162\n', 'ARCTIC CAT,M 8000 SNO PRO 153\n', 'AR

In [14]:
with open("cars.csv") as f:
    headers = next(f)
    make_groups = itertools.groupby(f, key=lambda x: x.split(",")[0])
    for make, models in make_groups:
        print(make, len(list(models)), end="\n")

ACURA 6
ALFA ROMEO 2
APRILIA 4
ARCTIC CAT 96
ARGO 4
ASTON MARTIN 5
AUDI 27
BENTLEY 2
BLUE BIRD 1
BMW 86
BUGATTI 1
BUICK 5
CADILLAC 7
CAN-AM 61
CHEVROLET 33
CHRYSLER 2
DODGE 7
DUCATI 4
FERRARI 6
FIAT 2
FORD 34
FREIGHTLINER 7
GMC 12
HARLEY DAVIDSON 29
HINO 7
HONDA 91
HUSABERG 4
HUSQVARNA 9
HYUNDAI 13
INDIAN 3
INFINITI 8
JAGUAR 9
JEEP 5
JOHN DEERE 19
KAWASAKI 59
KENWORTH 11
KIA 10
KTM 13
KUBOTA 4
KYMCO 28
LAMBORGHINI 2
LAND ROVER 6
LEXUS 14
LINCOLN 6
LOTUS 1
MACK 9
MASERATI 3
MAZDA 5
MCLAREN 2
MERCEDES-BENZ 60
MINI 3
MITSUBISHI 8
NISSAN 24
PEUGEOT 3
POLARIS 101
PORSCHE 4
RAM 6
RENAULT 4
ROLLS ROYCE 3
SCION 5
SEAT 3
SKI-DOO 67
SMART 1
SRT 1
SUBARU 10
SUZUKI 48
TESLA 2
TOYOTA 19
TRIUMPH 10
VESPA 4
VICTORY 14
VOLKSWAGEN 16
VOLVO 8
YAMAHA 110
