# Strings and Stuff  in Python

In [None]:
import numpy as np

## Strings are just arrays of characters

In [None]:
s = 'spam'

s,len(s),s[0],s[0:2]

In [None]:
s[::-1]

### Arithmetic with Strings

In [None]:
s = 'spam'
e = "eggs"

s + e

In [None]:
s + " " + e

In [None]:
4 * (s + " ") + e

In [None]:
print(4 * (s + " ") + s + " and\n" + e)     # use \n to get a newline with the print function

### You can compare strings

In [None]:
"spam" == "good"

In [None]:
"spam" != "good"

In [None]:
"spam" == "spam"

In [None]:
"sp" < "spam"

In [None]:
"spam" < "eggs"

## Python supports `Unicode` characters

You can enter `unicode` characters directly from the keyboard (depends on your operating system), or you can use the `ASCII` encoding. 

A list of `ASCII` encoding can be found [here](https://en.wikipedia.org/wiki/List_of_Unicode_characters).

For example the `ASCII` ecoding for the greek capital omega is `U+03A9`, so you can create the character with `\U000003A9`

In [None]:
print("This resistor has a value of 100 k\U000003A9")

In [None]:
Ω = 1e3

Ω + np.pi

### [Emoji](https://en.wikipedia.org/wiki/Emoji) are unicode characters, so you can use them a well (not all OSs will show all characters!)

In [None]:
radio_active = "\U00002622"
wink = "\U0001F609"

print(radio_active + wink)

### Emoji can not be used as variable names (at least not yet ...)

In [None]:
☢ = 2.345

☢ ** 2    

### Watch out for variable types! 

In [None]:
n = 4

print("I would like " + n + " orders of spam")

In [None]:
print("I would like " + str(n) + " orders of spam")

## Use explicit formatting to avoid these errors

### Python string formatting has the form:

`{Variable Index: Format Type}  .format(Variable)`

In [None]:
A = 42
B = 1.23456
C = 1.23456e10
D = 'Forty Two'

In [None]:
"I like the number {0:d}".format(A)

In [None]:
"I like the number {0:s}".format(D)

In [None]:
"The number {0:f} is fine, but not a cool as {1:d}".format(B,A)

In [None]:
"The number {0:.3f} is fine, but not a cool as {1:d}".format(C,A)       # 3 places after decimal

In [None]:
"The number {0:.3e} is fine, but not a cool as {1:d}".format(C,A)       # sci notation

In [None]:
"{0:g} and {1:g} are the same format but different results".format(B,C)

### Nice trick to convert number to a different base

In [None]:
"Representation of the number {1:s} - dec: {0:d};  hex: {0:x};  oct: {0:o};  bin: {0:b}".format(A,D)

## Formatting is way better than piecing strings together

In [None]:
import pandas as pd

In [None]:
planet_table = pd.read_csv('Planets.csv')

In [None]:
for idx,val in enumerate(planet_table['Name']):
    
    a = planet_table['a'][idx]
    
    if (a < 3.0):
        Place = "Inner"
        
    else:
        Place = "Outer"
    
    my_string = ("The planet {0:s}, at a distance of {1:.1f} AU, is in the {2:s} solar system"
                .format(val,a,Place))
   
    print(my_string)

### Really long strings

In [None]:
long_string = (
"""
The planets {0:s} and {1:s} are at a distance
of {2:.1f} AU and {3:.1f} AU from the Sun.
"""
.format(planet_table['Name'][1],planet_table['Name'][4],
        planet_table['a'][1],planet_table['a'][4])
)

In [None]:
print(long_string)

### You can also use the `textwrap` module

In [None]:
import textwrap

In [None]:
lots_of_spam = (s + " ") * 100

In [None]:
print(lots_of_spam)

In [None]:
textwrap.wrap(lots_of_spam, width=70)

##  Working with strings

In [None]:
line = "My hovercraft is full of eels"

### Find and Replace

In [None]:
line.replace('eels', 'wheels')

### Justification and Cleaning

In [None]:
line.center(100)

In [None]:
line.ljust(100)

In [None]:
line.rjust(100, "*")

In [None]:
line2 = "            My hovercraft is full of eels      "

In [None]:
line2.strip()

In [None]:
line3 = "*$*$*$*$*$*$*$*$My hovercraft is full of eels*$*$*$*$"

In [None]:
line3.strip('*$')

In [None]:
line3.lstrip('*$'), line3.rstrip('*$')

### Splitting and Joining

In [None]:
line.split()

In [None]:
'_*_'.join(line.split())

In [None]:
' '.join(line.split()[::-1])

### Line Formatting

In [None]:
anotherline = "mY hoVErCRaft iS fUlL oF eEELS"

In [None]:
anotherline.upper()

In [None]:
anotherline.lower()

In [None]:
anotherline.title()

In [None]:
anotherline.capitalize()

In [None]:
anotherline.swapcase()

# Regular Expression in Python (`re`)

In [None]:
import re

In [None]:
myline = "This is a test, this in only a test."

In [None]:
print(myline)

Raw strings begin with a special prefix (`r`) and signal Python not to interpret backslashes and other special metacharacters in the string, allowing you to pass them through directly to the regular expression engine.

In [None]:
regex1 = r"test"

In [None]:
match1 = re.search(regex1, myline)

In [None]:
match1

In [None]:
myline[10:14]

In [None]:
match3 = re.findall(regex1, myline)

In [None]:
match3

#### One of the useful tings about regular expressions in Python is using the to search and replace parts of string (`re.sub`)

In [None]:
mynewline = re.sub(regex1, "*TEST*", myline)

In [None]:
mynewline

## RegEx Golf!

In [None]:
golf_file = open("./GOLF/golf_00").read().splitlines()

In [None]:
golf_file

In [None]:
for i in golf_file:
    print(i)

In [None]:
def regex_test_list(mylist, myregex):
    
    for line in mylist:
        
        mytest = re.search(myregex, line)
        
        if (mytest):
            print(line + " YES")
        else:
            print(line + " NOPE")

In [None]:
regex = r"one"

In [None]:
regex_test_list(golf_file, regex)

In [None]:
regex = r"t|n"

In [None]:
regex_test_list(golf_file, regex)

# Working with Files and Directories (OS agnostic)
- The [`os`](https://docs.python.org/3.4/library/os.html) package allows you to do operating system stuff without         worrying about what system you are using

In [None]:
import os

In [None]:
os.chdir("./MyData")

In [None]:
my_data_dir = os.listdir()

In [None]:
my_data_dir

In [None]:
for file in my_data_dir:
    
    if file.endswith(".txt"):
        print(file)

In [None]:
for file in my_data_dir:
    
    if file.endswith(".txt"):
        print(os.path.abspath(file))

### You can also find files with [`glob`](https://docs.python.org/3.4/library/glob.html)

In [None]:
import glob

In [None]:
my_files = glob.glob('02_*.fits')

In [None]:
my_files

In [None]:
for file in my_files:
    
    file_size = os.stat(file).st_size
    out_string = "The file {0} as a size of {1} bytes".format(file,file_size)
    
    print(out_string)