In [1]:
fstream = open('README.md')
type(fstream)

_io.TextIOWrapper

In [2]:
text = fstream.read() # we can read everything
text

'## Python Core Language \n\nIncluding some standard libraries\n\n### Official Documentation\n\n* https://docs.python.org/3/\n'

In [5]:
moretext = fstream.read()
moretext # so why do we get nothing the second time?

''

In [6]:
# we need to reset head to start if we want to read the stream again
fstream.seek(0)
textlines = fstream.readlines()
textlines

['## Python Core Language \n',
 '\n',
 'Including some standard libraries\n',
 '\n',
 '### Official Documentation\n',
 '\n',
 '* https://docs.python.org/3/\n']

In [7]:
fstream.close() # so we would need to remember to close manually

In [9]:
fstream.read() # good we can not read a closed file

ValueError: I/O operation on closed file.

In [10]:
with open('README.md') as f:
    mylines = f.readlines()
# file will be closed here
print("File is closed already here")
mylines

File is closed already here


['## Python Core Language \n',
 '\n',
 'Including some standard libraries\n',
 '\n',
 '### Official Documentation\n',
 '\n',
 '* https://docs.python.org/3/\n']

In [11]:
# usually you would open CSV file with a library such as CSV or Pandas (which we will look in a week or so)
# for difficult path manipulation we would use os.pathlib
# here we have a relative data path we go up one level and then to sibling folder data
with open('../data/MOCK_DATA_A24.csv') as fstream:
    mlines = fstream.readlines()
print("Got mlines in length:", len(mlines))
    

Got mlines in length: 1001


In [12]:
type(mlines)

list

In [13]:
mlines[:5]

['id,first_name,last_name,email,gender,ip_address,age\n',
 '1,Orelie,Oxbe,ooxbe0@chicagotribune.com,Female,104.158.76.212,71\n',
 '2,Elke,Vasyutin,evasyutin1@mail.ru,Female,75.248.31.227,70\n',
 '3,Godfrey,Bengtsen,gbengtsen2@live.com,Male,184.20.158.49,55\n',
 '4,Dolly,Caitlin,dcaitlin3@google.com,Female,137.157.89.124,81\n']

In [None]:
# Next Step how to transform this into a list of tuples of individual cells 

In [14]:
fline = mlines[1]
fline

'1,Orelie,Oxbe,ooxbe0@chicagotribune.com,Female,104.158.76.212,71\n'

In [15]:
fline.strip()

'1,Orelie,Oxbe,ooxbe0@chicagotribune.com,Female,104.158.76.212,71'

In [17]:
tuple(fline.strip().split(','))

('1',
 'Orelie',
 'Oxbe',
 'ooxbe0@chicagotribune.com',
 'Female',
 '104.158.76.212',
 '71')

In [18]:
# Classical loop
newlist = []
for line in mlines:
    newline = tuple(line.strip().split(','))
    newlist.append(newline)
newlist[:5]

[('id', 'first_name', 'last_name', 'email', 'gender', 'ip_address', 'age'),
 ('1',
  'Orelie',
  'Oxbe',
  'ooxbe0@chicagotribune.com',
  'Female',
  '104.158.76.212',
  '71'),
 ('2',
  'Elke',
  'Vasyutin',
  'evasyutin1@mail.ru',
  'Female',
  '75.248.31.227',
  '70'),
 ('3',
  'Godfrey',
  'Bengtsen',
  'gbengtsen2@live.com',
  'Male',
  '184.20.158.49',
  '55'),
 ('4',
  'Dolly',
  'Caitlin',
  'dcaitlin3@google.com',
  'Female',
  '137.157.89.124',
  '81')]

In [19]:
# we can
nlist = [tuple(line.strip().split(',')) for line in mlines]
nlist[:3]

[('id', 'first_name', 'last_name', 'email', 'gender', 'ip_address', 'age'),
 ('1',
  'Orelie',
  'Oxbe',
  'ooxbe0@chicagotribune.com',
  'Female',
  '104.158.76.212',
  '71'),
 ('2',
  'Elke',
  'Vasyutin',
  'evasyutin1@mail.ru',
  'Female',
  '75.248.31.227',
  '70')]

In [20]:
secondperson = nlist[2]
secondperson

('2',
 'Elke',
 'Vasyutin',
 'evasyutin1@mail.ru',
 'Female',
 '75.248.31.227',
 '70')

In [21]:
secondperson[3]

'evasyutin1@mail.ru'

In [22]:
secondperson[3].endswith('.ru')

True

In [23]:
'.ru' in secondperson[3] #not as precise since this could be anywhere in string

True

In [24]:
fromrussia = [person for person in nlist if person[3].endswith('.ru')]
fromrussia

[('2',
  'Elke',
  'Vasyutin',
  'evasyutin1@mail.ru',
  'Female',
  '75.248.31.227',
  '70'),
 ('12',
  'Augustin',
  'Blasdale',
  'ablasdaleb@google.ru',
  'Male',
  '180.108.170.23',
  '14'),
 ('94',
  'Kerianne',
  'Tonkin',
  'ktonkin2l@yandex.ru',
  'Female',
  '55.242.207.177',
  '67'),
 ('107', 'Poppy', 'Rush', 'prush2y@ucoz.ru', 'Female', '107.83.83.211', '38'),
 ('248',
  'Oralia',
  'Robroe',
  'orobroe6v@liveinternet.ru',
  'Female',
  '26.65.250.246',
  '75'),
 ('336',
  'Ebony',
  'Parton',
  'eparton9b@ucoz.ru',
  'Female',
  '214.149.5.160',
  '14'),
 ('385',
  'Renard',
  'Swanston',
  'rswanstonao@vkontakte.ru',
  'Male',
  '121.175.32.29',
  '66'),
 ('403',
  'Ardith',
  'Crocombe',
  'acrocombeb6@yandex.ru',
  'Female',
  '104.139.29.144',
  '25'),
 ('417',
  'Viki',
  'Blemen',
  'vblemenbk@vkontakte.ru',
  'Female',
  '209.252.244.79',
  '62'),
 ('452',
  'Minetta',
  'Julien',
  'mjuliencj@rambler.ru',
  'Female',
  '160.50.152.128',
  '10'),
 ('454',
  'Manda',

In [25]:
len(fromrussia)

21

In [27]:
# how to filter only female users from russia (we already have fromrussia list)
femrussia = [person for person in fromrussia if "Female" in person[4]] 
len(femrussia)

14

In [28]:
# how to filter only female users from russia (we already have fromrussia list)
frussia = [person for person in fromrussia if person[4] == "Female"] # more string check
len(frussia)

14

In [29]:
ffem = frussia[0]
ffem

('2',
 'Elke',
 'Vasyutin',
 'evasyutin1@mail.ru',
 'Female',
 '75.248.31.227',
 '70')

In [30]:
ffem[-1]

'70'

In [31]:
int(ffem[-1])

70

In [32]:
ages = [int(f[-1]) for f in frussia]
ages

[70, 67, 38, 75, 14, 25, 62, 10, 67, 94, 35, 59, 77, 29]

In [33]:
sum(ages)/len(ages)

51.57142857142857

In [34]:
max(ages)

94

In [35]:
min(ages)

10

In [42]:
fstrings = [", ".join(line)+"\n" for line in frussia]
fstrings[:3]

['2, Elke, Vasyutin, evasyutin1@mail.ru, Female, 75.248.31.227, 70\n',
 '94, Kerianne, Tonkin, ktonkin2l@yandex.ru, Female, 55.242.207.177, 67\n',
 '107, Poppy, Rush, prush2y@ucoz.ru, Female, 107.83.83.211, 38\n']

In [43]:
with open('../data/results.csv', mode='w') as fstream:
    fstream.writelines(fstrings)