# LaTeX

## Writing to LaTeX files

In [2]:
import pandas as pd
import numpy as np

In [3]:
df = pd.DataFrame([[1, 2], [3, 4]], index=["a", "b"], columns=["c", "d"])

print(df.style.to_latex())

\begin{tabular}{lrr}
 & c & d \\
a & 1 & 2 \\
b & 3 & 4 \\
\end{tabular}



In [4]:
print(df.style.format("$ {}").to_latex())

\begin{tabular}{lrr}
 & c & d \\
a & $ 1 & $ 2 \\
b & $ 3 & $ 4 \\
\end{tabular}



# XML

## Reading XML

In [5]:
from io import StringIO

xml = """<?xml version="1.0" encoding="UTF-8"?>
<bookstore>
  <book category="cooking">
    <title lang="en">Everyday Italian</title>
    <author>Giada De Laurentiis</author>
    <year>2005</year>
    <price>30.00</price>
  </book>
  <book category="children">
    <title lang="en">Harry Potter</title>
    <author>J K. Rowling</author>
    <year>2005</year>
    <price>29.99</price>
  </book>
  <book category="web">
    <title lang="en">Learning XML</title>
    <author>Erik T. Ray</author>
    <year>2003</year>
    <price>39.95</price>
  </book>
</bookstore>"""

In [6]:
df = pd.read_xml(StringIO(xml))

df

Unnamed: 0,category,title,author,year,price
0,cooking,Everyday Italian,Giada De Laurentiis,2005,30.0
1,children,Harry Potter,J K. Rowling,2005,29.99
2,web,Learning XML,Erik T. Ray,2003,39.95


In [7]:
# Reading a URL with no options

df = pd.read_xml("https://www.w3schools.com/xml/books.xml")

df

Unnamed: 0,category,title,author,year,price,cover
0,cooking,Everyday Italian,Giada De Laurentiis,2005,30.0,
1,children,Harry Potter,J K. Rowling,2005,29.99,
2,web,XQuery Kick Start,Vaidyanathan Nagarajan,2003,49.99,
3,web,Learning XML,Erik T. Ray,2003,39.95,paperback


In [9]:
#XML from AWS S3 buckets such as NIH NCBI PMC Article Datasets providing Biomedical and Life Science Jorurnals

df = pd.read_xml(
    "s3://pmc-oa-opendata/oa_comm/xml/all/PMC1236943.xml",
    xpath=".//journal-meta",
)


df

Unnamed: 0,journal-id,journal-title,issn,publisher
0,Cardiovasc Ultrasound,Cardiovascular Ultrasound,1476-7120,


In [11]:
file_path = "books.xml"

with open(file_path, "w") as f:
    f.write(xml)


with open(file_path, "r") as f:
    df = pd.read_xml(StringIO(f.read()))


df

Unnamed: 0,category,title,author,year,price
0,cooking,Everyday Italian,Giada De Laurentiis,2005,30.0
1,children,Harry Potter,J K. Rowling,2005,29.99
2,web,Learning XML,Erik T. Ray,2003,39.95


In [12]:
df = pd.read_xml(file_path, xpath="//book[year=2005]")

df

Unnamed: 0,category,title,author,year,price
0,cooking,Everyday Italian,Giada De Laurentiis,2005,30.0
1,children,Harry Potter,J K. Rowling,2005,29.99


In [14]:
xml = """<?xml version='1.0' encoding='utf-8'?>
<doc:data xmlns:doc="https://example.com">
  <doc:row>
    <doc:shape>square</doc:shape>
    <doc:degrees>360</doc:degrees>
    <doc:sides>4.0</doc:sides>
  </doc:row>
  <doc:row>
    <doc:shape>circle</doc:shape>
    <doc:degrees>360</doc:degrees>
    <doc:sides/>
  </doc:row>
  <doc:row>
    <doc:shape>triangle</doc:shape>
    <doc:degrees>180</doc:degrees>
    <doc:sides>3.0</doc:sides>
  </doc:row>
</doc:data>"""


df = pd.read_xml(StringIO(xml),
                 xpath="//doc:row",
                 namespaces={"doc": "https://example.com"})


df

Unnamed: 0,shape,degrees,sides
0,square,360,4.0
1,circle,360,
2,triangle,180,3.0


In [15]:
xml = """<?xml version='1.0' encoding='utf-8'?>
 <response>
  <row>
    <station id="40850" name="Library"/>
    <month>2020-09-01T00:00:00</month>
    <rides>
      <avg_weekday_rides>864.2</avg_weekday_rides>
      <avg_saturday_rides>534</avg_saturday_rides>
      <avg_sunday_holiday_rides>417.2</avg_sunday_holiday_rides>
    </rides>
  </row>
  <row>
    <station id="41700" name="Washington/Wabash"/>
    <month>2020-09-01T00:00:00</month>
    <rides>
      <avg_weekday_rides>2707.4</avg_weekday_rides>
      <avg_saturday_rides>1909.8</avg_saturday_rides>
      <avg_sunday_holiday_rides>1438.6</avg_sunday_holiday_rides>
    </rides>
  </row>
  <row>
    <station id="40380" name="Clark/Lake"/>
    <month>2020-09-01T00:00:00</month>
    <rides>
      <avg_weekday_rides>2949.6</avg_weekday_rides>
      <avg_saturday_rides>1657</avg_saturday_rides>
      <avg_sunday_holiday_rides>1453.8</avg_sunday_holiday_rides>
    </rides>
  </row>
 </response>"""


xsl = """<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
   <xsl:output method="xml" omit-xml-declaration="no" indent="yes"/>
   <xsl:strip-space elements="*"/>
   <xsl:template match="/response">
      <xsl:copy>
        <xsl:apply-templates select="row"/>
      </xsl:copy>
   </xsl:template>
   <xsl:template match="row">
      <xsl:copy>
        <station_id><xsl:value-of select="station/@id"/></station_id>
        <station_name><xsl:value-of select="station/@name"/></station_name>
        <xsl:copy-of select="month|rides/*"/>
      </xsl:copy>
   </xsl:template>
 </xsl:stylesheet>"""


output = """<?xml version='1.0' encoding='utf-8'?>
 <response>
   <row>
      <station_id>40850</station_id>
      <station_name>Library</station_name>
      <month>2020-09-01T00:00:00</month>
      <avg_weekday_rides>864.2</avg_weekday_rides>
      <avg_saturday_rides>534</avg_saturday_rides>
      <avg_sunday_holiday_rides>417.2</avg_sunday_holiday_rides>
   </row>
   <row>
      <station_id>41700</station_id>
      <station_name>Washington/Wabash</station_name>
      <month>2020-09-01T00:00:00</month>
      <avg_weekday_rides>2707.4</avg_weekday_rides>
      <avg_saturday_rides>1909.8</avg_saturday_rides>
      <avg_sunday_holiday_rides>1438.6</avg_sunday_holiday_rides>
   </row>
   <row>
      <station_id>40380</station_id>
      <station_name>Clark/Lake</station_name>
      <month>2020-09-01T00:00:00</month>
      <avg_weekday_rides>2949.6</avg_weekday_rides>
      <avg_saturday_rides>1657</avg_saturday_rides>
      <avg_sunday_holiday_rides>1453.8</avg_sunday_holiday_rides>
   </row>
 </response>"""


df = pd.read_xml(StringIO(xml), stylesheet=xsl)

df

Unnamed: 0,station_id,station_name,month,avg_weekday_rides,avg_saturday_rides,avg_sunday_holiday_rides
0,40850,Library,2020-09-01T00:00:00,864.2,534.0,417.2
1,41700,Washington/Wabash,2020-09-01T00:00:00,2707.4,1909.8,1438.6
2,40380,Clark/Lake,2020-09-01T00:00:00,2949.6,1657.0,1453.8


In [20]:
ext_geom_df = pd.DataFrame(
    {
        "type": ["polygon", "other", "polygon"],
        "shape": ["square", "circle", "triangle"],
        "degrees": [360, 360, 180],
        "sides": [4, np.nan, 3],
    }
)

ext_geom_df

Unnamed: 0,type,shape,degrees,sides
0,polygon,square,360,4.0
1,other,circle,360,
2,polygon,triangle,180,3.0


In [21]:
pvt_df = ext_geom_df.pivot_table(index='shape',
                                columns = 'type',
                                values=['degrees', 'sides'],
                                aggfunc='sum')

pvt_df

Unnamed: 0_level_0,degrees,degrees,sides,sides
type,other,polygon,other,polygon
shape,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
circle,360.0,,0.0,
square,,360.0,,4.0
triangle,,180.0,,3.0


In [23]:
geom_df = pd.DataFrame(
    {
        "shape": ["square", "circle", "triangle"],
        "degrees": [360, 360, 180],
        "sides": [4, np.nan, 3],
    }
)

In [24]:
# writing an XML without declaration or pretty print.

print(
    geom_df.to_xml(xml_declaration=False,
                  pretty_print=False)
)

<data><row><index>0</index><shape>square</shape><degrees>360</degrees><sides>4.0</sides></row><row><index>1</index><shape>circle</shape><degrees>360</degrees><sides/></row><row><index>2</index><shape>triangle</shape><degrees>180</degrees><sides>3.0</sides></row></data>


# Excel Files