## RCS Python File Operations 2

# Binary Files

In [2]:
with open("myfile.bin", "wb") as f:
    f.write(b'\x30\x31\x09\x32\x20\x52\x43\x53\x0A\x51') # notice b prefix!!


In [3]:
with open("myfile.bin", "r") as f:
    lines=f.readlines()
    f.seek(0)
    text=f.read()
print(lines)
print(lines[0])
print(text)

['01\t2 RCS\n', 'Q']
01	2 RCS

01	2 RCS
Q


ASCII Codes
https://en.wikipedia.org/wiki/ASCII

In [4]:
with open("myfile.bin", "rb") as f:
    blines=f.readlines()

In [5]:
print(blines)

[b'01\t2 RCS\n', b'Q']


In [37]:
blines

[b'012 RCS']

## For more complicated binary writing and reading of binaries 
### pickle standard library is recommended

https://docs.python.org/3/library/pickle.html

In [6]:
import pickle

In [7]:
with open("myfile.bin", "wb") as f:
    myint = 42
    mystring = "Hello, RCS!"
    mylist = ["sun", "moon", "earth"]
    mydict = { "name": "Val", "job": "Teacher" }   
    pickle.dump(myint, f)
    pickle.dump(mystring, f)
    pickle.dump(mylist, f)
    pickle.dump(mydict, f)


In [8]:
with open("myfile.bin", "r", encoding=None) as f:
    mf=f.read()
mf

'€\x03K*.€\x03X\x0b\x00\x00\x00Hello, RCS!q\x00.€\x03]q\x00(X\x03\x00\x00\x00sunq\x01X\x04\x00\x00\x00moonq\x02X\x05\x00\x00\x00earthq\x03e.€\x03}q\x00(X\x04\x00\x00\x00nameq\x01X\x03\x00\x00\x00Valq\x02X\x03\x00\x00\x00jobq\x03X\x07\x00\x00\x00Teacherq\x04u.'

### Not very helpful is it? Better to use pickle to retrieve the data and "unpickle" it

In [9]:
with open("myfile.bin", "rb") as f:
    myint = pickle.load(f)
    mystring = pickle.load(f)
    mylist = pickle.load(f)
    mydict = pickle.load(f)  


In [11]:
# Recipe for opening a pickled file with unknown number of variables
with open('myfile.bin', "rb") as f:
    mylist = []
    while True:
        try:
            mylist.append(pickle.load(f))
        except EOFError:
            print("End of file reached!")
            break

End of file reached!


In [12]:
len(mylist)

4

In [13]:
mylist[3]

{'name': 'Val', 'job': 'Teacher'}

In [10]:
print(myint,mystring,mylist,mydict)

42 Hello, RCS! ['sun', 'moon', 'earth'] {'name': 'Val', 'job': 'Teacher'}


In [14]:
import os
print(os.getcwd()) # cwd - current working directory

C:\Users\val-p1\Github\RCS_Data_Analysis_Python_2019_May


In [19]:
mycwd = os.getcwd()
mycwd

'C:\\Users\\vsd\\Documents\\Github\\RCS_Python'

In [15]:
%pwd

'C:\\Users\\val-p1\\Github\\RCS_Data_Analysis_Python_2019_May'

In [16]:
!dir

 Volume in drive C is Windows
 Volume Serial Number is 5AA0-2A07

 Directory of C:\Users\val-p1\Github\RCS_Data_Analysis_Python_2019_May

05/21/2019  05:31 PM    <DIR>          .
05/21/2019  05:31 PM    <DIR>          ..
05/21/2019  05:30 PM    <DIR>          .ipynb_checkpoints
05/20/2019  05:58 PM               386 Calc.py
05/13/2019  04:26 PM         7,435,248 Data_Analysis_Python_Introduction.pdf
05/21/2019  03:27 PM             2,603 Functions Returning Values.ipynb
05/13/2019  04:26 PM             1,713 Git_Workflow.md
05/20/2019  04:20 PM               828 Homework #1.ipynb
05/20/2019  04:14 PM                85 Homework #1.py
05/21/2019  04:02 PM             2,595 Homework 2.ipynb
05/20/2019  05:01 PM               282 hw.py
05/18/2019  02:01 PM               637 HW1.ipynb
05/13/2019  04:26 PM    <DIR>          img
05/20/2019  04:55 PM             2,991 Importing Your own libraries.ipynb
05/13/2019  04:26 PM               217 Jupyter with Python.md
05/13/2019  04:26 PM          

In [19]:
os.getlogin()

'val-p1'

In [20]:
os.getcwd()

'C:\\Users\\val-p1\\Github\\RCS_Data_Analysis_Python_2019_May'

In [21]:
os.rename('numbers.txt', 'bignumbers.txt')

In [22]:
myfiles = os.listdir()
myfiles[:5]

['.git',
 '.ipynb_checkpoints',
 'bignumbers.txt',
 'Calc.py',
 'Data_Analysis_Python_Introduction.pdf']

In [25]:
myfiles

['.git',
 '.ipynb_checkpoints',
 'bignumbers.txt',
 'Calc.py',
 'Data_Analysis_Python_Introduction.pdf',
 'Functions Returning Values.ipynb',
 'Git_Workflow.md',
 'Homework #1.ipynb',
 'Homework #1.py',
 'Homework 2.ipynb',
 'hw.py',
 'HW1.ipynb',
 'img',
 'Importing Your own libraries.ipynb',
 'Jupyter with Python.md',
 'LICENSE',
 'myfile.bin',
 'mylib.ipynb',
 'mylib.py',
 'numbers16_39_12.txt',
 'numbers2019-05-21.txt',
 'Python Classes in Class 20.05.2019.ipynb',
 'Python Classes.ipynb',
 'Python Cleaning Up Text Files.ipynb',
 'Python Comparison operators.md',
 'Python Conditional Execution Branching.md',
 'Python Data Structures Exercises.ipynb',
 'Python Dictionaries.ipynb',
 'Python Errors.ipynb',
 'Python Exercises.md',
 'Python File IO in class 21.05.2019.ipynb',
 'Python File Operations 2 Binary Files and Pickle in class 21.05.2019.ipynb',
 'Python Flow Control.ipynb',
 'Python Flow Control_in_class_18.05.2019.ipynb',
 'Python Functions.ipynb',
 'Python Functions.md',
 'Pyt

In [26]:

mytextfiles = [file for file in myfiles if '.txt' in file]
mytextfiles

['bignumbers.txt',
 'numbers16_39_12.txt',
 'numbers2019-05-21.txt',
 'somefile.txt',
 'sometext.txt',
 'squares.txt',
 'squares2.txt']

In [None]:
os

In [27]:
result = []
for file in mytextfiles:
    with open(file) as f:
        txt = f.read()
        result.append(len(txt))
        

In [30]:
filesizes = []
for file in mytextfiles:
    filesizes.append((file, os.path.getsize(file)))
filesizes

[('bignumbers.txt', 211),
 ('numbers16_39_12.txt', 167),
 ('numbers2019-05-21.txt', 167),
 ('somefile.txt', 69),
 ('sometext.txt', 135),
 ('squares.txt', 169),
 ('squares2.txt', 169)]

In [31]:
filedict = {}
for file in mytextfiles:
    filedict[file] = os.path.getsize(file)
filedict

{'bignumbers.txt': 211,
 'numbers16_39_12.txt': 167,
 'numbers2019-05-21.txt': 167,
 'somefile.txt': 69,
 'sometext.txt': 135,
 'squares.txt': 169,
 'squares2.txt': 169}

In [33]:
print('mytextfiles is a ', type(mytextfiles))
# one line dictionary comprehension
fdict = {f:os.path.getsize(f) for f in mytextfiles}
fdict

mytextfiles is a  <class 'list'>


{'bignumbers.txt': 211,
 'numbers16_39_12.txt': 167,
 'numbers2019-05-21.txt': 167,
 'somefile.txt': 69,
 'sometext.txt': 135,
 'squares.txt': 169,
 'squares2.txt': 169}

In [28]:
result

[202, 158, 158, 66, 126, 159, 159]

In [18]:
os.listdir('C:\\')

['$Recycle.Bin',
 'Documents and Settings',
 'DRIVERS',
 'hiberfil.sys',
 'Intel',
 'pagefile.sys',
 'PerfLogs',
 'Program Files',
 'Program Files (x86)',
 'ProgramData',
 'Recovery',
 'swapfile.sys',
 'System Volume Information',
 'TVicPortPersonal',
 'Users',
 'Windows']

In [20]:
os.chdir("c:\\")
os.getcwd()


'c:\\'

In [21]:
os.chdir(mycwd)
os.getcwd()

'C:\\Users\\vsd\\Documents\\Github\\RCS_Python'

In [18]:
os.chdir("C:\\Users\\vsd\\Documents\\Github\\RCS_Python")
os.getcwd()

'C:\\Users\\vsd\\Documents\\Github\\RCS_Python'

	The os.path.join() function constructs a pathname out of one or more partial pathnames
   **Don’t fuss with slashes; always use os.path.join() and let Python do the right thing.**

In [23]:
os.path.join(os.getcwd(), "README.md")

'C:\\Users\\vsd\\Documents\\Github\\RCS_Python\\README.md'

In [39]:
os.path.join("C:\\Users\\vsd\\Documents\\Github\\RCS_Python","README.md")

'C:\\Users\\vsd\\Documents\\Github\\RCS_Python\\README.md'

In [34]:
print(os.path.join(os.path.expanduser('~'), 'Documents', 'Github', 'RCS_Python', 'README.md')) 

C:\Users\val-p1\Documents\Github\RCS_Python\README.md


In [39]:
print(os.path.join(os.getcwd(),'Documents', 'Github', 'RCS_Python', 'README.md')) 

C:\Users\val-p1\Github\RCS_Data_Analysis_Python_2019_May\Documents\Github\RCS_Python\README.md


In [18]:
newpath=os.path.join(os.path.expanduser('~'), 'Documents', 'Github')
newpath

'C:\\Users\\Val\\Documents\\Github'

In [19]:
print(newpath) ## Aha pretty print!!

C:\Users\Val\Documents\Github


In [26]:
mypath=os.getcwd()
mypath

'C:\\Users\\vsd\\Documents\\Github\\RCS_Python'

In [27]:
mysplit = os.path.split(mypath)
mysplit

('C:\\Users\\vsd\\Documents\\Github', 'RCS_Python')

In [28]:
mydir, myfname = os.path.split(mypath)
print(mydir,":",myfname)

C:\Users\vsd\Documents\Github : RCS_Python


The glob module finds all the pathnames matching a specified pattern according to the rules used by the Unix shell, although results are returned in arbitrary order.

In [None]:
glob.

In [42]:
from glob import glob as gl
ifiles=gl('Python*.ipynb')
ifiles

['Python Classes in Class 20.05.2019.ipynb',
 'Python Classes.ipynb',
 'Python Cleaning Up Text Files.ipynb',
 'Python Data Structures Exercises.ipynb',
 'Python Dictionaries.ipynb',
 'Python Errors.ipynb',
 'Python File IO in class 21.05.2019.ipynb',
 'Python File Operations 2 Binary Files and Pickle in class 21.05.2019.ipynb',
 'Python Flow Control.ipynb',
 'Python Flow Control_in_class_18.05.2019.ipynb',
 'Python Functions.ipynb',
 'Python Introduction.ipynb',
 'Python Learning Resources.ipynb',
 'Python Lists.ipynb',
 'Python Lists_in_class_14.05.2019.ipynb',
 'Python Modules and Imports.ipynb',
 'Python Sets in class 05.18.2019.ipynb',
 'Python Sets.ipynb',
 'Python Strings.ipynb',
 'Python Strings_in_class_14.05.2019.ipynb',
 'Python Tuples.ipynb',
 'Python Variables and Data Types.ipynb',
 'Python Variables and Data Types_in_class_13.05.2019.ipynb',
 'Python_List_Exercise_1.ipynb']

In [42]:
ipyth=glob.glob('*Python*.ipynb')
ipyth

['RCS Python Classes.ipynb',
 'RCS Python File IO.ipynb',
 'RCS Python File Operations.ipynb',
 'RCS Python Functions.ipynb',
 'RCS Python Importing Modules.ipynb',
 'RCS Python Math NumPy SymPy.ipynb',
 'RCS Python Modules.ipynb']

In [31]:
ifile2=glob.glob('*File*.*,*.md')
ifile2

[]

### New in version 3.4.

For example, consider a directory containing the following files: 1.gif, 2.txt, card.gif and a subdirectory sub which contains only the file 3.txt. glob() will produce the following results. Notice how any leading components of the path are preserved.

In [32]:
glob.glob('./*V*.*')

['.\\geckodriver.log',
 '.\\RCS Python CSV files.ipynb',
 '.\\RCS Python Overloading.ipynb',
 '.\\RCS Python pip and virtualenv.ipynb',
 '.\\RCS Python Web Development with Flask.ipynb',
 '.\\RCS Variables and Data Types.ipynb']

In [45]:
! mkdir Text # ! is Jupyter command for running OS commands b

# File Operations directly from Python

https://docs.python.org/3/library/subprocess.html#module-subprocess

subprocess.run(args, *, stdin=None, input=None, stdout=None, stderr=None, shell=False, cwd=None, timeout=None, check=False, encoding=None, errors=None)

In [47]:
%pwd

'C:\\Users\\vsd\\Documents\\Github\\RCS_Python'

In [33]:
import sys
sys.path

['',
 'C:\\ProgramData\\Anaconda3\\python36.zip',
 'C:\\ProgramData\\Anaconda3\\DLLs',
 'C:\\ProgramData\\Anaconda3\\lib',
 'C:\\ProgramData\\Anaconda3',
 'C:\\ProgramData\\Anaconda3\\lib\\site-packages',
 'C:\\ProgramData\\Anaconda3\\lib\\site-packages\\win32',
 'C:\\ProgramData\\Anaconda3\\lib\\site-packages\\win32\\lib',
 'C:\\ProgramData\\Anaconda3\\lib\\site-packages\\Pythonwin',
 'C:\\ProgramData\\Anaconda3\\lib\\site-packages\\IPython\\extensions',
 'C:\\Users\\vsd\\.ipython']

In [48]:
import subprocess
print(subprocess.run("calculator", shell=True, stdout=subprocess.PIPE))

CompletedProcess(args='calculator', returncode=1, stdout=b'')


In [34]:
import subprocess
print(subprocess.run("dir", shell=True, stdout=subprocess.PIPE)) ## Without pipe we will get no output

CompletedProcess(args='dir', returncode=0, stdout=b' Volume in drive C is New Volume\r\n Volume Serial Number is 8A1C-6B07\r\n\r\n Directory of C:\\Users\\vsd\\Documents\\Github\\RCS_Python\r\n\r\n06/28/2018  05:40 PM    <DIR>          .\r\n06/28/2018  05:40 PM    <DIR>          ..\r\n06/28/2018  04:46 PM    <DIR>          .ipynb_checkpoints\r\n05/21/2018  03:59 PM    <DIR>          .vscode\r\n05/23/2018  05:25 PM           274,038 altum.pdf\r\n05/21/2018  04:04 PM                72 arg.py\r\n05/21/2018  04:26 PM               456 argopt.py\r\n05/21/2018  04:11 PM               248 argpos.py\r\n05/23/2018  04:21 PM            96,129 combined.pdf\r\n05/14/2018  05:51 PM               427 fibbo.py\r\n05/14/2018  05:40 PM               347 fibo.py\r\n05/28/2018  06:08 PM            15,622 geckodriver.log\r\n05/21/2018  03:56 PM             4,349 Generate Password.ipynb\r\n06/13/2018  03:05 PM             1,105 Git_Workflow.md\r\n05/19/2018  01:15 PM             1,301 gp.py\r\n05/14/2018  

In [49]:
print(subprocess.run("C:\\Program Files (x86)\\Google\\Chrome\\Application\\chrome.exe"))

CompletedProcess(args='C:\\Program Files (x86)\\Google\\Chrome\\Application\\chrome.exe', returncode=0)


In [36]:
print(subprocess.run("chrome.exe"))

FileNotFoundError: [WinError 2] The system cannot find the file specified

In [37]:
subprocess.run(["mkdir", "testdir"], shell=True, stdout=subprocess.PIPE)

CompletedProcess(args=['mkdir', 'testdir'], returncode=0, stdout=b'')

In [38]:
import sys


In [13]:
! dir

 Volume in drive C is CODE
 Volume Serial Number is AC66-C9D3

 Directory of C:\Users\vdell\Documents\Github\RCS_Python

05/14/2018  10:13 AM    <DIR>          .
05/14/2018  10:13 AM    <DIR>          ..
05/14/2018  10:05 AM    <DIR>          .ipynb_checkpoints
05/10/2018  10:05 AM               411 Git_Workflow.md
05/11/2018  10:38 AM                67 mytext.txt
05/11/2018  11:39 AM               135 numbers.txt
05/10/2018  10:05 AM            17,905 RCS Flow Control.ipynb
05/11/2018  02:16 PM             4,623 RCS Python Challenge.ipynb
05/14/2018  09:55 AM            36,091 RCS Python Classes.ipynb
05/14/2018  09:55 AM            12,185 RCS Python File IO.ipynb
05/14/2018  10:11 AM            10,112 RCS Python File Operations.ipynb
05/11/2018  10:02 AM            14,087 RCS Python Functions.ipynb
05/14/2018  09:55 AM             2,315 RCS Python Importing Modules.ipynb
05/14/2018  09:55 AM            16,686 RCS Python Math NumPy SymPy.ipynb
05/14/2018  09:55 AM            12,806 RC

In [14]:
%%writefile ./Text/Test.txt
Just a simple text file
Nothing special

Writing ./Text/Test.txt


In [15]:
! dir Text

 Volume in drive C is CODE
 Volume Serial Number is AC66-C9D3

 Directory of C:\Users\vdell\Documents\Github\RCS_Python\Text

05/14/2018  10:14 AM    <DIR>          .
05/14/2018  10:14 AM    <DIR>          ..
05/14/2018  10:14 AM                40 Test.txt
               1 File(s)             40 bytes
               2 Dir(s)  74,738,360,320 bytes free


In [39]:
glob.glob('**/*.txt', recursive=True) #We should also get subdirectory name

['numbers.txt',
 'somefile.txt',
 'resources\\cleaned.txt',
 'resources\\Veidenbaums.txt']

In [41]:
glob.glob('./*.md')

['.\\Git_Workflow.md', '.\\README.md']

In [9]:
glob.glob('./?.md') # requires a single char only so we wont get a match for longer file names

[]

In [35]:
meta = os.stat('README.md')
print(type(meta),meta) # os.stat returns a class containing file meta information

<class 'os.stat_result'> os.stat_result(st_mode=33206, st_ino=5348024557889930, st_dev=609827677, st_nlink=1, st_uid=0, st_gid=0, st_size=207, st_atime=1526155447, st_mtime=1526155447, st_ctime=1526155447)


In [33]:
import time
time.localtime(meta.st_mtime) #mtime last modified tiem

time.struct_time(tm_year=2018, tm_mon=5, tm_mday=12, tm_hour=23, tm_min=4, tm_sec=7, tm_wday=5, tm_yday=132, tm_isdst=1)

In [None]:
## Homework 2 for file operations
# Process 'resources\\cleaned.txt',
# Generate a dictionary of words and their frequency - "Un" : 76
# Save this dictionary in a text file, each word and frequency in a new line
# Bonus for pickling the dictionary