In [1]:
###1. Counting unique words

import re
from collections import Counter

def count_words(path):
    with open(path, encoding='utf-8') as file:
        all_words = re.findall(r"[0-9a-zA-Z-']+", file.read())
        all_words = [word.upper() for word in all_words]
        print('\nTotal Words:', len(all_words))
        
        word_counts = Counter()
        for word in all_words:
            word_counts[word] += 1
        
        print('\nTop 20 Words:')
        for word in word_counts.most_common(20):
            print(word[0], '\t', word[1])
            
if __name__ == '__main__':
    count_words('shakespeare.txt')


Total Words: 850474

Top 20 Words:
THE 	 26554
AND 	 25002
I 	 20224
TO 	 18230
OF 	 16650
A 	 13930
YOU 	 12846
MY 	 11669
IN 	 10777
THAT 	 10406
IS 	 8669
NOT 	 7892
WITH 	 7376
ME 	 7201
IT 	 7185
FOR 	 7131
HIS 	 6533
BE 	 6390
THIS 	 6247
YOUR 	 6243


In [5]:
#2. Merging CSV Files

import csv

def merge_csv(csv_list, output_path):
    # build list with all fieldnames
    fieldnames = list()
    for file in csv_list:
        with open(file, 'r') as input_csv:
            fn = csv.DictReader(input_csv).fieldnames
            fieldnames.extend(x for x in fn if x not in fieldnames)
    
    with open(output_path, 'w', newline='') as output_csv:
        writer = csv.DictWriter(output_csv, fieldnames=fieldnames)
        writer.writeheader()
        for file in csv_list:
            with open(file, 'r') as input_csv:
                reader = csv.DictReader(input_csv)
                for row in reader:
                    writer.writerow(row)
        
if __name__ == '__main__':
    merge_csv(['class1.csv', 'class2.csv'], 'all_students.csv')

In [3]:
#3. Save a Dictionary

import pickle

def save_dict(dict_to_save, file_path):
    with open(file_path, 'wb') as file:
        pickle.dump(dict_to_save, file)
    
def load_dict(file_path):
    with open(file_path, 'rb') as file:
        return pickle.load(file)

if __name__ == '__main__':
    test_dict = {1: 'a', 2: 'b', 3: 'c'}
    save_dict(test_dict, 'test_dict.pickle')
    recovered = load_dict('test_dict.pickle')
    print(recovered)

{1: 'a', 2: 'b', 3: 'c'}


In [4]:
#4 ZIP a file

import os
from zipfile import ZipFile

def zip_all(search_dir, extension_list, output_path):
    with ZipFile(output_path, 'w') as output_zip:
        for root, dirs, files in os.walk(search_dir):
            rel_path = os.path.relpath(root, search_dir)
            for file in files:
                name, ext = os.path.splitext(file)
                if ext.lower() in extension_list:
                    output_zip.write(os.path.join(root, file),
                                     arcname=os.path.join(rel_path, file))
                    
if __name__ == '__main__':
    zip_all('.\\my_stuff', ['.jpg','.txt'], 'my_stuff.zip')

In [6]:
def index_all(search_list, item):
    indices = list()
    for i in range(len(search_list)):
        if search_list[i] == item:
            indices.append([i])
        elif isinstance(search_list[i], list):
            for index in index_all(search_list[i], item):
                indices.append([i]+index)
    return indices

if __name__ == '__main__':    
    example = [[[1, 2, 3], 2, [1, 3]], [1, 2, 3]]
    print(index_all(example, 2))
    print(index_all(example, [1, 2, 3]))

[[0, 0, 1], [0, 1], [1, 1]]
[[0, 0], [1]]
