<h1 style="color:mediumvioletred"> Deal with files 2 </h1>

In [6]:
import os
import csv
import sys
import random
import fileinput
from itertools import islice

<h3 style="color:mediumvioletred"> Remove words from file </h3>

In [None]:
def reservoir_sample(iterable, k, randrange=random.randrange, shuffle=random.shuffle):
    """Select *k* random elements from *iterable*. If number of items less then *k* then return all items in random order. """
    it = iter(iterable)
    if not (k > 0):
        raise ValueError("sample size must be positive")

    # Fill the reservoir
    sample = list(islice(it, k)) 
    shuffle(sample)
    for i, item in enumerate(it, start=k+1):
        j = randrange(i) # random [0..i)
        if j < k:
            # Replace item with gradually decreasing probability
            sample[j] = item 
    return sample

def delete_lines(filename, lines):
    """Delete *lines* from *filename*."""
    lines = set(lines) # for amortized O(1) lookup
    with fileinput.FileInput(filename, inplace=True, backup='.bak') as file:
        for line in file:
            if line not in lines:
                print(line, end='')
    # Remove backup if there is no exception
    os.unlink(filename + '.bak') 


In [None]:
k, filename = 10, './data/a_text.txt'

# Read the lines from the file into a list
with open(filename) as file:
    #lines1 = file.readlines()
    lines = [line.strip() for line in file]

In [22]:
print("lines:", lines)
# Get k random lines
random_lines = reservoir_sample(lines, k) 
print("random_lines", random_lines)

# If file is empty
#if not random_lines: 
    #sys.exit() # do nothing, exit immediately

if not random_lines: # file is empty
    print("File is empty. Nothing to delete.")
else:
    # Print random lines
    print("\n".join(map(str.strip, random_lines))) 
    # Delete them from the file
    delete_lines(filename, random_lines) 

lines: ['This is line 1.', 'Hello, world!', 'Python is awesome.', 'Line number 4.', 'I love programming.', 'Another line here.', 'Sample text.', 'Line number 8.', 'Testing the file.', 'Final line in the file.', '']
lines ['This is line 1.', 'Hello, world!', 'Python is awesome.', 'Line number 4.', 'I love programming.', 'Another line here.', 'Sample text.', 'Line number 8.', 'Testing the file.', 'Final line in the file.', '']
random_lines ['', 'Line number 4.', 'Sample text.', 'Another line here.', 'Hello, world!', 'Testing the file.', 'Python is awesome.', 'I love programming.', 'Line number 8.', 'Final line in the file.']

Line number 4.
Sample text.
Another line here.
Hello, world!
Testing the file.
Python is awesome.
I love programming.
Line number 8.
Final line in the file.


<h3 style="color:mediumvioletred"> Count occurrances in a file </h3>

In [23]:
count = 0
word_list = []
counters = []
with open("./data/some_words.txt", 'r') as fp:
    lines = fp.readlines()
    print(lines)
    lines_1 = [element.strip() for element in lines]
    print(lines_1)
    lines_2 = list(set(lines_1))
    print(lines_2)
    for w in lines_2:
        for l in lines_1:
            if(l==w):
                count=count+1
        print("Occurrences of {} : {}".format(w,count))
        count = 0        

['the big bang\n', 'the pillow feels soft\n', 'odd one out\n', 'press conference\n', 'show me the money\n', 'time to go home\n', 'that was easy\n', 'hangman is cool\n', 'zoologist\n', 'quadruplets\n', 'the sky is blue']
['the big bang', 'the pillow feels soft', 'odd one out', 'press conference', 'show me the money', 'time to go home', 'that was easy', 'hangman is cool', 'zoologist', 'quadruplets', 'the sky is blue']
['press conference', 'zoologist', 'the big bang', 'odd one out', 'hangman is cool', 'time to go home', 'the sky is blue', 'that was easy', 'the pillow feels soft', 'show me the money', 'quadruplets']
Occurrences of press conference : 1
Occurrences of zoologist : 1
Occurrences of the big bang : 1
Occurrences of odd one out : 1
Occurrences of hangman is cool : 1
Occurrences of time to go home : 1
Occurrences of the sky is blue : 1
Occurrences of that was easy : 1
Occurrences of the pillow feels soft : 1
Occurrences of show me the money : 1
Occurrences of quadruplets : 1


<h3 style="color:mediumvioletred"> Write on a file </h3>

In [5]:
""" Write """

f = open("./test_to_writein.txt", "w+")
f.write("testing 0\n")
f.write("testing 1\n")
pos = f.tell()
f.write("testing 2\n")

f.seek(pos, os.SEEK_SET)
f.truncate(pos)
#f.write("foo\n")

4

<h3 style="color:mediumvioletred"> Notes: </h3>
<div style="margin-top: -28px;">

Use the "with" statement!    
It ensures that the file is properly closed after its suite is executed, even if an exception occurs during the execution.     
Concept of Context manager => implemented by the file object returned by the open() function.    

Opening a file using the "with" statement, Python creates a context manager that takes care of opening and closing the file automatically.       
When the block of code inside the "with" statement finishes executing, the context manager is responsible for closing the file,
regardless of whether the code finishes successfully or an exception is raised.    
</div>

<h3 style="color:mediumvioletred"> Append more data </h3>

In [6]:
with open("./data/test_to_writein.txt", "a") as f:
    f.write("testing 2\n")

<h2 style="color:mediumvioletred"> <u> CSV </u></h2>

In [5]:
""" From text to csv """
with open('./data/text_use_ex.txt', 'r') as in_file:
    stripped = (line.strip() for line in in_file)
    lines = (line.split(",") for line in stripped if line)
    with open('./data/def_list_questions.csv', 'w') as out_file:
        writer = csv.writer(out_file)
        writer.writerows(lines)

In [27]:
""" Open the CSV file using a file object.
N.B. 
Use csv not pandas! 
"""
with open('./data/names_jobs.csv', 'r') as file:
    # Create a CSV reader object
    reader = csv.reader(file)   
    # Skip the first row, let's say that the csv file it is not a header row, otherwise comment
    next(reader, None)           
    
    for row in reader:
        print(row)

['1', 'Michael', 'Smith', 'michael.smith@example.com', '555-1234', '30', 'New York', 'Engineer', '80000']
['2', 'Emily', 'Johnson', 'emily.johnson@example.com', '555-5678', '25', 'Los Angeles', 'Teacher', '50000']
['3', 'Andrew', 'Williams', 'andrew.williams@example.com', '555-9012', '35', 'Chicago', 'Doctor', '120000']
['4', 'Olivia', 'Brown', 'olivia.brown@example.com', '555-3456', '28', 'San Francisco', 'Designer', '60000']
['5', 'William', 'Davis', 'william.davis@example.com', '555-7890', '32', 'Miami', 'Developer', '90000']
['6', 'Sophia', 'Jones', 'sophia.jones@example.com', '555-1111', '27', 'Houston', 'Marketing Manager', '75000']
['7', 'Benjamin', 'Miller', 'benjamin.miller@example.com', '555-2222', '31', 'Dallas', 'Accountant', '65000']
['8', 'Ava', 'Taylor', 'ava.taylor@example.com', '555-3333', '29', 'Seattle', 'Software Engineer', '85000']
['9', 'Alexander', 'Anderson', 'alexander.anderson@example.com', '555-4444', '33', 'Austin', 'Data Analyst', '55000']
['10', 'Mia', 'Th

In [3]:
%%script echo Skipping, to not show the path.
""" Build a file path relative to the current working directory. """
current_dir = os.getcwd()                                   
file_path = os.path.join(current_dir, 'MNIST_train.txt')   
print(current_dir)
print(file_path)

Skipping, to not show the path.
