## Working with Text File

In [None]:
# !pip install PyPDF2==3.0.1
# !pip install SpeechRecognition==3.10.4
# !pip install pyaudio==0.2.14

### 1. Working with `open()` Files in `write()` Mode
- **Opening and Writing to Files**: Learn how to create and write to text files.
- **Appending to Files**: Understand how to append content to an existing file.


In [4]:
import os

if not os.path.exists('data'):
    os.makedirs('data')

os.makedirs('data', exist_ok=True)

In [8]:
fp = open('data/example.txt', 'w')
fp.write('this is some content\n')
fp.write('this is another content')
fp.close()

In [9]:
with open('data/example1.txt', 'w') as fp:
    fp.write('this is some content\n')
    fp.write('this is another content')

In [11]:
lines = ['first line', 'second line', 'third line']
for line in lines:
    with open('data/example2.txt', 'a') as fp:
        fp.write(line)

In [14]:
lines = ['first line', 'second line', 'third line']
# lines = ['first line\n', 'second line\n', 'third line\n']

with open('data/example3.txt', 'w') as fp:
    for line in lines:
        fp.write(line)
        fp.write('\n')


In [16]:
# lines = ['first line', 'second line', 'third line']
lines = ['first line\n', 'second line\n', 'third line\n']

with open('data/example3.txt', 'w') as fp:
    fp.writelines(lines)

In [20]:
lines = ['first line', 'second line', 'third line']
text = "\n".join(lines)
with open('data/example4.txt', 'w') as fp:
    fp.write(text)

In [23]:
lines = ['fourth line', 'fifth line', 'sixth line']
text = "\n".join(lines)
with open('data/example4.txt', 'a') as fp:
    fp.write('\n')
    fp.write(text)

### 2. Read and Evaluate the Files
- **Reading Files**: Learn how to read text files and evaluate their content.
- **Evaluating Expressions from Files**: Using `eval()` to interpret data.


In [28]:
fp = open('data/example.txt', 'r')
text = fp.read()
print(text)
text.splitlines()

this is some content
this is another content


['this is some content', 'this is another content']

In [30]:
text.split('\n')

['this is some content', 'this is another content']

In [31]:
fp = open('data/example.txt', 'r')
text = fp.readlines()
text

['this is some content\n', 'this is another content']

In [33]:
fp = open('data/example.txt', 'r')
fp.readline()

'this is some content\n'

In [35]:
fp.readline()
fp.close()

In [36]:
with open('data/example.txt', 'r') as fp:
    text = fp.read().splitlines()

text

['this is some content', 'this is another content']

In [38]:
with open('data/expression.txt', 'w') as fp:
    fp.write('2+2')
    fp.write('\n')
    fp.write(str(2+2.45))

In [41]:
with open('data/expression.txt', 'r') as fp:
    text = fp.read().splitlines()
    text = [eval(x) for x in text]
text

[4, 4.45]

In [43]:
d1 = list(range(1,100))
# d1

In [44]:
with open('data/expression_list.txt', 'w') as fp:
    fp.write(str(d1))

In [45]:
with open('data/expression_list.txt', 'r') as fp:
    d11 = fp.read()

In [47]:
type(d11)

str

In [48]:
d1[10:20]

[11, 12, 13, 14, 15, 16, 17, 18, 19, 20]

In [50]:
d11 = eval(d11)
type(d11)

list

In [51]:
d11[10:20]

[11, 12, 13, 14, 15, 16, 17, 18, 19, 20]

### 3. Reading and Writing `.CSV` and `.TSV` Files with Pandas
- **Reading CSV/TSV Files**: Learn how to load data from `CSV/TSV` files using Pandas.
- **Writing Data to CSV/TSV**: Save DataFrames to `CSV/TSV` files.


In [52]:
import pandas as pd

In [58]:
df = pd.read_csv('data/example.txt', header=None)

text = df[0].tolist()

In [59]:
text

['this is some content', 'this is another content']

In [62]:
df = pd.read_csv('data/example1.txt', header=None)

# text = df[0].tolist()
# text
df

Unnamed: 0,0,1,2
0,this is some content,this is,another content


In [65]:
url = 'https://github.com/laxmimerit/All-CSV-ML-Data-Files-Download/raw/master/jamesbond.csv'
df = pd.read_csv(url, nrows=10)

In [69]:
url = 'https://github.com/laxmimerit/All-CSV-ML-Data-Files-Download/raw/master/moviereviews.tsv'
df = pd.read_csv(url, nrows=10, sep='\t')
df

Unnamed: 0,label,review
0,neg,how do films like mouse hunt get into theatres...
1,neg,some talented actresses are blessed with a dem...
2,pos,this has been an extraordinary year for austra...
3,pos,according to hollywood movies made in last few...
4,neg,my first press screening of 1998 and already i...
5,neg,"to put it bluntly , ed wood would have been pr..."
6,neg,"synopsis : melissa , a mentally-disturbed woma..."
7,neg,tim robbins and martin lawernce team up in thi...
8,neg,"in "" gia "" , angelina jolie plays the titular ..."
9,neg,"in 1990 , the surprise success an unheralded l..."


In [72]:
url = 'https://github.com/laxmimerit/All-CSV-ML-Data-Files-Download/raw/master/moviereviews.tsv'
df = pd.read_csv(url, sep='\t', usecols=['review'])
# df

In [75]:
url = 'https://github.com/laxmimerit/All-CSV-ML-Data-Files-Download/raw/master/IMDB-Dataset.csv'
df = pd.read_csv(url, chunksize=9_000)
df

<pandas.io.parsers.readers.TextFileReader at 0x254160f5220>

In [76]:
for chunk in df:
    print(chunk.shape)

(9000, 2)
(9000, 2)
(9000, 2)
(9000, 2)
(9000, 2)
(5000, 2)


In [92]:
url = 'https://github.com/laxmimerit/All-CSV-ML-Data-Files-Download/raw/master/IMDB-Dataset.csv'
df = pd.read_csv(url, chunksize=9_000)

data = pd.DataFrame()
for chunk in df:
    data = pd.concat([data, chunk])

In [94]:
data.to_csv('data/imdb_movie.csv', index=None)

In [95]:
data.to_csv('data/imdb_movie.tsv', index=None, sep='\t')

### 4. Reading and Writing `.XLSX` Files with Pandas
- **Working with Excel Files**: Learn how to read and write Excel files using Pandas.
- **Multiple Sheets**: Handle Excel files with multiple sheets.


In [96]:
url = 'https://github.com/laxmimerit/All-CSV-ML-Data-Files-Download/raw/master/Data%20-%20Multiple%20Worksheets.xlsx'

In [97]:
df = pd.read_excel(url)


In [98]:
df

Unnamed: 0,First Name,Last Name,City,Gender
0,Brandon,James,Miami,M
1,Sean,Hawkins,Denver,M
2,Judy,Day,Los Angeles,F
3,Ashley,Ruiz,San Francisco,F
4,Stephanie,Gomez,Portland,F


In [99]:
df = pd.read_excel(url, sheet_name=None)

In [102]:
df.items()

df1 = df['Data 1']
df2 = df['Data 2']

In [106]:
df = pd.read_excel(url, sheet_name=None, usecols=['First Name', 'City'])
df1 = df['Data 1']
df2 = df['Data 2']

In [109]:
df1.to_excel('data/df1.xlsx', index=None)

In [119]:
with pd.ExcelWriter('data/multiple_sheet.xlsx') as fp:
    df1.to_excel(fp, sheet_name='data 1')
    df2.to_excel(fp, sheet_name='data 2')

### 5. Reading and Writing `.json` Files
- **Handling JSON Data**: Learn how to read and write JSON files using Pandas.
- **Nested JSON**: Work with nested JSON structures.


In [122]:
import json

In [125]:
df1.to_json('data/output.json', orient='records')

In [126]:
df1.to_json('data/output1.json', orient='records', lines=True)

In [131]:
d1 = json.load(open('data/output.json'))
pd.DataFrame(d1)

Unnamed: 0,First Name,City
0,Brandon,Miami
1,Sean,Denver
2,Judy,Los Angeles
3,Ashley,San Francisco
4,Stephanie,Portland


In [132]:
d1 = pd.read_json('data/output.json')
d1

Unnamed: 0,First Name,City
0,Brandon,Miami
1,Sean,Denver
2,Judy,Los Angeles
3,Ashley,San Francisco
4,Stephanie,Portland


In [134]:
d2 = pd.read_json('data/output1.json', lines=True)
d2

Unnamed: 0,First Name,City
0,Brandon,Miami
1,Sean,Denver
2,Judy,Los Angeles
3,Ashley,San Francisco
4,Stephanie,Portland


In [135]:
## nested json data

nested_data = {
    'passenger':{
        'name': 'bob',
        'age': 30,
        'ticket':{
            'number': '2345hfd',
            'price': 123
        }
    }
}

In [136]:
pd.json_normalize(nested_data)

Unnamed: 0,passenger.name,passenger.age,passenger.ticket.number,passenger.ticket.price
0,bob,30,2345hfd,123


### 6. Extract Text Data from PDF
- **Working with PDFs**: Extract text data from PDF files.
- **Handling Multiple Pages**: Extract text from multi-page PDFs.


In [141]:
# !pip install PyPDF2==3.0.1

In [1]:
import PyPDF2

In [15]:
with open('data/BERT.pdf', 'rb') as fp:
    reader = PyPDF2.PdfReader(fp)
    text = reader.pages[0].extract_text()

In [20]:
pages = []
with open('data/BERT.pdf', 'rb') as fp:
    reader = PyPDF2.PdfReader(fp)
    for i in range(len(reader.pages)):
        page = reader.pages[i]
        text = page.extract_text()
        pages.append(text)

In [22]:
text = "\n".join(pages)

with open('data/bert.txt', 'w') as fp:
    fp.write(text)

In [23]:
pages = []
with open('data/BERT.pdf', 'rb') as fp:
    reader = PyPDF2.PdfReader(fp)
    for i in range(len(reader.pages)):
        if i+1 in [1,2,3]:
            page = reader.pages[i]
            text = page.extract_text()
            pages.append(text)

text = "\n".join(pages)

with open('data/bert123.txt', 'w') as fp:
    fp.write(text)

### 7. Record the Audio and Convert to Text
- **Recording Audio**: Learn how to record audio using Python.
- **Converting Audio to Text**: Use speech recognition to transcribe audio.

In [None]:
# !pip install SpeechRecognition==3.10.4
# !pip install pyaudio==0.2.14

In [24]:
import pyaudio

In [25]:
audio = pyaudio.PyAudio()
audio.get_device_count()

34

In [27]:
for i in range(audio.get_device_count()):
    info = audio.get_device_info_by_index(i)
    # print(info)

In [28]:
import speech_recognition as sr

In [29]:
recognizer = sr.Recognizer()
with sr.Microphone() as source:
    print('Say something...')
    audio = recognizer.listen(source)

Say something...


In [31]:
text = recognizer.recognize_google(audio)
print(text)

so I am running my recognition here and this recognizer is running here I have to stop for some time so that recognizer can stop here alright so I am just going to stop and recognizer will now stop recording


In [33]:
import os
os.makedirs('audio', exist_ok=True)

In [34]:
with open('audio/record1.wav', 'wb') as fp:
    fp.write(audio.get_wav_data())

In [35]:
with sr.Microphone() as source:
    print('Recording only for 5 seconds...')
    audio = recognizer.record(source, duration=5)

    text = recognizer.recognize_google(audio)
    print('Here is your command')
    print(text)

Recording only for 5 seconds...
Here is your command
open some folder and


In [36]:
## read audio and then convert into text data

In [38]:
with sr.AudioFile('audio/record1.wav') as fp:
    audio_data = recognizer.record(fp)

text = recognizer.recognize_google(audio_data)
print('Here is your command')
print(text)

Here is your command
so I am running my recognition here and this recognizer is running here I have to stop for some time so that recognizer can stop here alright so I am just going to stop and recognizer will now stop recording


In [39]:
with sr.AudioFile('audio/record1.wav') as fp:
    audio_data = recognizer.record(fp, duration=5)

text = recognizer.recognize_google(audio_data)
print('Here is your command')
print(text)

Here is your command
so I am running my recognition here and this recognizer
