# String Formatter

In [1]:
name = 'KGB Talkie'

In [3]:
print('The Youtube channel is {}'.format(name))

The Youtube channel is KGB Talkie


In [7]:
print(f'The Youtube channel is {name}')

The Youtube channel is KGB Talkie


In [8]:
# Minimum width and alignment between columns
# lets say we have to colums
# day value
# 1    10
# 10   11

In [9]:
data_science_tuts = [('Python for beginners', 19),
                    ('Feature selection for machine learning',10),
                    ('Machine Learning Tutorials', 11),
                    ('Deep learning Tutorials', 19)]

In [11]:
data_science_tuts

[('Python for beginners', 19),
 ('Feature selection for machine learning', 10),
 ('Machine Learning Tutorials', 11),
 ('Deep learning Tutorials', 19)]

In [13]:
for info in data_science_tuts:
    print(info)

('Python for beginners', 19)
('Feature selection for machine learning', 10)
('Machine Learning Tutorials', 11)
('Deep learning Tutorials', 19)


In [16]:
# aligning the two collumns
for info in data_science_tuts:
    print(f'{info[0]:{50}} {info[1]:{10}}')

Python for beginners                                       19
Feature selection for machine learning                     10
Machine Learning Tutorials                                 11
Deep learning Tutorials                                    19


In [18]:
for info in data_science_tuts:
    print(f'{info[0]:>{50}} {info[1]:{10}}')

                              Python for beginners         19
            Feature selection for machine learning         10
                        Machine Learning Tutorials         11
                           Deep learning Tutorials         19


In [21]:
for info in data_science_tuts:
    print(f'{info[0]:^{50}} {info[1]:.>{10}}')

               Python for beginners                ........19
      Feature selection for machine learning       ........10
            Machine Learning Tutorials             ........11
             Deep learning Tutorials               ........19


### Working with .CSV or . TSV

In [29]:
import pandas as pd

data = pd.read_csv('train.tsv', sep ='\t')
data.head()

Unnamed: 0,PhraseId,SentenceId,Phrase,Sentiment
0,1,1,A series of escapades demonstrating the adage ...,1
1,2,1,A series of escapades demonstrating the adage ...,2
2,3,1,A series,2
3,4,1,A,2
4,5,1,series,2


0 - negative
1 - somewhat negative
2 - neutral
3 - somewhat positive
4 - positive

In [31]:
data.shape

(156060, 4)

In [34]:
data['Sentiment'].value_counts()

2    79582
3    32927
1    27273
4     9206
0     7072
Name: Sentiment, dtype: int64

In [35]:
pos = data[data['Sentiment']== 4]

In [38]:
pos.drop(['PhraseId', 'SentenceId'],axis = 1, inplace = True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,


In [40]:
pos.to_csv('pos.tsv', sep= '\t', index = False)

In [42]:
pd.read_csv('pos.tsv', sep = '\t')

Unnamed: 0,Phrase,Sentiment
0,"This quiet , introspective and entertaining in...",4
1,"quiet , introspective and entertaining indepen...",4
2,entertaining,4
3,is worth seeking,4
4,A positively thrilling combination of ethnogra...,4
...,...,...
9201,is laughingly enjoyable,4
9202,a unique culture that is presented with univer...,4
9203,with universal appeal,4
9204,really do a great job of anchoring the charact...,4


In [44]:
# built in magic command in jupyter %% writefile 
# an easy way to write a file. Only possible in jupyter

In [45]:
%%writefile text1.txt
Hello this is an NLP lesson


Writing text1.txt


In [49]:
# appending

NameError: name 't' is not defined

In [50]:
%%writefile -a text1.txt
This is the appended tex

Appending to text1.txt


#### Using python's inbuilt command to read and write text files

In [52]:
file = open('text1.txt', 'r')

In [54]:
file

<_io.TextIOWrapper name='text1.txt' mode='r' encoding='cp1252'>

In [56]:
file.read()

''

In [74]:
# setting the pointer
file.seek(0)


0

In [65]:
file.read()

''

In [75]:
file.readline()

'Hello this is an NLP lesson\n'

In [79]:
file.seek(0)

0

In [80]:
file.readlines()

['Hello this is an NLP lesson\n', 'This is the appended tex\n']

In [81]:
# closing file
file.close()

In [83]:
file.readlines()

ValueError: I/O operation on closed file.

In [84]:
# another file which does not need to close file separately

In [85]:
with open('text1.txt') as file:
    text_data = file.readlines()
    print(text_data)

['Hello this is an NLP lesson\n', 'This is the appended tex\n']


In [87]:
for temp in text_data:
    print(temp)

Hello this is an NLP lesson

This is the appended tex



In [89]:
# to remove new lines and spaces
for temp in text_data:
    print(temp.strip())

Hello this is an NLP lesson
This is the appended tex


In [94]:
for i, temp in enumerate(text_data):
    print(str(i) + ' ---> ' +temp.strip())

0 ---> Hello this is an NLP lesson
1 ---> This is the appended tex


### file writing

In [95]:
file = open('text2.txt', 'w')

In [97]:
file

<_io.TextIOWrapper name='text2.txt' mode='w' encoding='cp1252'>

In [99]:
file.write('This is just another lesson of NLP')

34

In [101]:
# this must be done to complete the write operation
file.close()

In [103]:
# shortcut

with open('text3.txt', 'w') as file:
    file.write(' This si the file')

In [105]:
# append mode
with open('text3.txt', 'a') as file:
    file.write(' This si the file')