In [34]:
import numpy as np
import pandas as pd
import pickle
import os

# Seeds the random generator
np.random.seed(0)
# Generates a 3x4 array
array = np.random.rand(3, 4)
# Saves the array as a CSV file
np.savetxt('np.csv', array, delimiter=',')
# Reads the CSV file into a DataFrame
df = pd.read_csv('np.csv', header=None)
print(df)
# Saves the DataFrame to a new CSV file
df.to_csv('df_output.csv', index=False)


          0         1         2         3
0  0.548814  0.715189  0.602763  0.544883
1  0.423655  0.645894  0.437587  0.891773
2  0.963663  0.383442  0.791725  0.528895


In [35]:
# Seeds the random generator
np.random.seed(0)
# Generates a 365x4 array
array = np.random.rand(365, 4)
# Saves the array as a CSV file
np.savetxt('np1.csv', array, delimiter=',')
# Reads the CSV file into a DataFrame
df = pd.read_csv('np.csv', header=None)
df.to_pickle('df_pickle.pkl')
# Reads the pickle file and prints the results
df_loaded = pd.read_pickle('df_pickle.pkl')
print(df_loaded)
# Retrieves the size of the file and prints the results.
pickle_size = os.path.getsize('df_pickle.pkl')
print(f'Pickle file size: {pickle_size} bytes')


          0         1         2         3
0  0.548814  0.715189  0.602763  0.544883
1  0.423655  0.645894  0.437587  0.891773
2  0.963663  0.383442  0.791725  0.528895
Pickle file size: 749 bytes


In [36]:
# Calls on the previously created DataFrame, creates an Excel file from the data, and prints the results.
df.to_excel('array.xlsx', index=False, header=False)
df_from_excel = pd.read_excel('array.xlsx', header=None)
print(df_from_excel)


          0         1         2         3
0  0.548814  0.715189  0.602763  0.544883
1  0.423655  0.645894  0.437587  0.891773
2  0.963663  0.383442  0.791725  0.528895


In [37]:
import json

# Creates a JSON string using the provided values.
json_str = '{"country":"Netherlands","dma_code":"0","timezone":"Europe/Amsterdam","area_code":"0","ip":"46.19.37.108","asn":"AS196752","continent_code":"EU","isp":"Tilaa V.O.F.","longitude":5.75,"latitude":52.5,"country_code":"NL","country_code3":"NLD"}'
# Parses the JSON string
data = json.loads(json_str)
# Prints the value for the "Country" column
print(f'Original Country: {data["country"]}')
# Changes the name of the country from Netherlands to Fantasia.
data["country"] = "Fantasia"  # Your chosen value
# Prints the updated result
print(f'Updated Country: {data["country"]}')


Original Country: Netherlands
Updated Country: Fantasia


Using the Pandas read_json() function, we can either create a pandas Series or DataFrame – taking the JSON string from #4, create a series
Change the country value again to your choice and convert the Pandas Series to a JSON string

In [38]:
import pandas as pd

# JSON string
json_str = '{"country":"Netherlands","dma_code":"0","timezone":"Europe/Amsterdam","area_code":"0","ip":"46.19.37.108","asn":"AS196752","continent_code":"EU","isp":"Tilaa V.O.F.","longitude":5.75,"latitude":52.5,"country_code":"NL","country_code3":"NLD"}'
# Read the JSON string into a DataFrame
df = pd.read_json(json_str, typ='series')
# Change the value for the "country"
df["country"] = "Fantasia"  # Your chosen value
# Convert the Series back to JSON
json_result = df.to_json()
# Print the updated Series and JSON result
print(df)
print(json_result)


country                   Fantasia
dma_code                         0
timezone          Europe/Amsterdam
area_code                        0
ip                    46.19.37.108
asn                       AS196752
continent_code                  EU
isp                   Tilaa V.O.F.
longitude                     5.75
latitude                      52.5
country_code                    NL
country_code3                  NLD
dtype: object
{"country":"Fantasia","dma_code":"0","timezone":"Europe\/Amsterdam","area_code":"0","ip":"46.19.37.108","asn":"AS196752","continent_code":"EU","isp":"Tilaa V.O.F.","longitude":5.75,"latitude":52.5,"country_code":"NL","country_code3":"NLD"}


The below code is a direct copy of code snippets from Python Data Analysis - Second Edition (Fandango A., 2017)

In [39]:
from bs4 import BeautifulSoup
import re

soup = BeautifulSoup(open('loremIpsum.html'))
print("First div\n", soup.div)
print("First div clss", soup.div['class'])
print("First dfn text", soup.dl.dt.dfn.text)

for link in soup.findAll('a'):
    print("Link text", link.string, "URL", link.get('href'))
for i, div in enumerate(soup('div')):
    print(i, div.contents)
official_div = soup.find_all("div", id="official")
print("Official Version", official_div[0].contents[2].strip())
print("# elements with class", len(soup.find_all(class_=True)))
tile_class = soup.find_all("div", class_="tile")
print("# Tile classes", len(tile_class))
print("# Divs with class containing tile", len(soup.find_all("div", class_=re.compile("tile"))))
print("Using CSS selector\n", soup.select('div.notile'))
print("Selecting ordered list list items\n", soup.select("ol > li") [:2])
print("Second list item in ordered list", soup.select("ol>li:nth-of-type(2)"))
print("Searching for text string", soup.find_all(text=re.compile("2014")))


First div
 <div class="tile">
<h4>Development</h4>
     0.10.1 - July 2014<br/>
</div>
First div clss ['tile']
First dfn text Quare attende, quaeso.
Link text loripsum.net URL http://loripsum.net/
Link text Poterat autem inpune; URL http://loripsum.net/
Link text Is es profecto tu. URL http://loripsum.net/
0 ['\n', <h4>Development</h4>, '\n     0.10.1 - July 2014', <br/>, '\n']
1 ['\n', <h4>Official Release</h4>, '\n     0.10.0 June 2014', <br/>, '\n']
2 ['\n', <h4>Previous Release</h4>, '\n     0.09.1 June 2013', <br/>, '\n']
Official Version 0.10.0 June 2014
# elements with class 3
# Tile classes 2
# Divs with class containing tile 3
Using CSS selector
 [<div class="notile">
<h4>Previous Release</h4>
     0.09.1 June 2013<br/>
</div>]
Selecting ordered list list items
 [<li>Cur id non ita fit?</li>, <li>In qua si nihil est praeter rationem, sit in una virtute finis bonorum;</li>]
Second list item in ordered list [<li>In qua si nihil est praeter rationem, sit in una virtute finis bono

  print("Searching for text string", soup.find_all(text=re.compile("2014")))
