Skip to content

Commit

Permalink
virtual web servers now use HTTPServer to serve a path vice proxying …
Browse files Browse the repository at this point in the history
…an external web server

  -- much simpler from WebServer object's perspective (complexity pushed to HTTPServer)
  -- removes dependency on having an external web server to proxy: this is a big win because
     it greatly complicated how the WebServer node operated, and also required VNS to rely
     on an outside box (harder to configure and setup).
  • Loading branch information
dound committed Jan 24, 2010
1 parent 5871639 commit e171dbd
Show file tree
Hide file tree
Showing 4 changed files with 53 additions and 219 deletions.
218 changes: 22 additions & 196 deletions Topology.py
Expand Up @@ -7,10 +7,12 @@
import struct
import time

from settings import ARP_CACHE_TIMEOUT, MAY_FORWARD_TO_PRIVATE_IPS
from settings import ARP_CACHE_TIMEOUT, MAY_FORWARD_TO_PRIVATE_IPS, WEB_SERVER_ROOT_WWW
from HTTPServer import HTTPServer
from LoggingHelper import log_exception, addrstr, pktstr
import ProtocolHelper
from ProtocolHelper import is_http_port
from ProtocolHelper import is_http_port, Packet
from TCPStack import TCPServer
from VNSProtocol import VNSPacket, VNSInterface, VNSHardwareInfo
import web.vnswww.models as db

Expand Down Expand Up @@ -79,7 +81,6 @@ def __init__(self, tid, raw_socket, client_ip, username, start_stats=True):
# read in this topology's nodes
db_nodes = db.Node.objects.filter(template=t.template)
self.gateway = None
self.web_server_ip_addrs = {} # maps IP address to web server node
self.nodes = [self.__make_node(dn, raw_socket) for dn in db_nodes]

# remember the DB to simulator object mapping
Expand Down Expand Up @@ -228,9 +229,6 @@ def handle_incoming_packet(self, packet, rewrite_dst_mac):
first simulated node attached to the gateway."""
gw_intf = self.gw_intf_to_first_hop
if gw_intf:
if self.__is_packet_from_proxy_to_web_server(packet):
return # already handled

self.stats.note_pkt_to_topo()
if rewrite_dst_mac:
if self.is_arp_cache_valid():
Expand All @@ -241,23 +239,6 @@ def handle_incoming_packet(self, packet, rewrite_dst_mac):
else:
gw_intf.link.send_to_other(gw_intf, packet)

def __is_packet_from_proxy_to_web_server(self, packet):
"""Checks to see if this packet is from a proxy to its web server and
should be forwarded directly to it (not through the topology). If so,
it takes care of this and returns True. Otherwise False is returned."""
if len(packet)>=24 and packet[12:14]=='\x08\x00' and packet[23]=='\x06': # TCP/IP?
pkt = ProtocolHelper.Packet(packet)
if pkt.is_valid_tcp() and is_http_port(pkt.tcp_src_port): # from HTTP port?
ws = self.web_server_ip_addrs.get(pkt.ip_dst) # to one of our web servers?
if ws:
ws = WebServer.HTTP_SESSIONS.get(pkt.tcp_dst_port)
if ws:
logging.debug('Gateway forwarding packet directly to %s for handling (from proxy): %s' % (ws.di(), pktstr(packet)))
ws.handle_http_reply(ws.interfaces[0], pkt)
# else: ignore it (old traffic)
return True
return False

def need_arp_translation_for_pkt(self, ethernet_frame):
"""Delays forwarding a packet to the node connected to the gateway until
it replies to an ARP request."""
Expand Down Expand Up @@ -345,8 +326,8 @@ def __make_node(self, dn, raw_socket):
elif dn.type == db.Node.HUB_ID:
return Hub(topo, dn.name)
elif dn.type == db.Node.WEB_SERVER_ID:
hostname = dn.webserver.web_server_addr.get_ascii_hostname()
return WebServer(topo, dn.name, hostname, dn.webserver.replace_hostname_in_http_replies)
path = WEB_SERVER_ROOT_WWW + dn.webserver.path_to_serve.get_ascii_path()
return WebServer(topo, dn.name, path)
elif dn.type == db.Node.GATEWAY_ID:
if self.gateway is not None:
err = 'only one gateway per topology is allowed'
Expand Down Expand Up @@ -700,191 +681,36 @@ class WebServer(BasicNode):
web_server_to_proxy_hostname parameter) on TCP port 80. Like
Host, it also replies to echo and ARP requests. It serves the specified
website by acting as a proxy for that website."""
def __init__(self, topo, name, web_server_to_proxy_hostname, repl_hn_in_replies):
def __init__(self, topo, name, path_to_serve):
BasicNode.__init__(self, topo, name)
self.web_server_to_proxy_hostname = web_server_to_proxy_hostname
self.__init_web_server_ip()
if repl_hn_in_replies:
# match 'a' tags which have a 'href' field containing the hostname
# of the server we're proxying
self.reply_re = re.compile(r'(<a.*?)(href="[^"]*)(%s)([^"]*")' % web_server_to_proxy_hostname)
self.reply_sub_f = None # will be set when add_interface() is first called
else:
self.reply_re = None
self.reply_sub_f = None

self.reply_re_sip = re.compile(r'SENDER__SRC__IP::PORT')

# Each request is from a unique socket (TCP port and IP pair). It is
# then forwarded from a different local TCP port to the web server this
# node is proxying. The request to local port mapping as well as the
# reverse mapping is stored in conns. Keys and values are all raw
# byte-strings in network byte order.
self.conns = {} # (requester IP, TCP port) <=> local TCP port
self.fins = {} # keys = conns key which has sent a FIN

def __init_web_server_ip(self):
"""Resolves the target web server hostname to an IP address."""
try:
self.web_server_to_proxy_ip = inet_aton(self.web_server_to_proxy_hostname) # just a plain IP address
return
except socket.error:
pass # must be a hostname; try to resolve it
try:
str_ip = socket.gethostbyname(self.web_server_to_proxy_hostname)
self.web_server_to_proxy_ip = inet_aton(str_ip)
except socket.gaierror:
self.web_server_to_proxy_ip = None
log_exception(logging.WARN,
'unable to resolve web server hostname: ' + self.web_server_to_proxy_hostname)

def add_interface(self, name, mac, ip, mask):
if self.reply_re and not self.reply_sub_f:
str_ip = inet_ntoa(ip)
len_diff = len(self.web_server_to_proxy_hostname) - len(str_ip)
if len_diff < 0:
logging.error('impossible hostname substitution request - ' + \
'DB should not permit this: hostname=%s (len=%d) < ip=%s (len=%d)' %
(self.web_server_to_proxy_hostname, len(self.web_server_to_proxy_hostname),
str_ip, len(str_ip)))
self.reply_re = None # can't do it
else:
# swap out the hostname for our IP; length MUST be the same to
# make TCP happy, so add spaces before the 'href' field as needed
extra_padding = ' ' * len_diff
self.reply_sub_f = lambda m : m.groups()[0] + extra_padding + m.groups()[1] + str_ip + m.groups()[3]

# tell the topology about this web server's IP
self.topo.web_server_ip_addrs[ip] = self

return BasicNode.add_interface(self, name, mac, ip, mask)
self.http_server = HTTPServer(TCPServer.ANY_PORT, path_to_serve)

@staticmethod
def get_type_str():
return 'Web Server'

def __has_web_server_ip(self):
"""Returns True if the hostname was successfully resolved to an IP."""
return self.web_server_to_proxy_ip is not None

def handle_non_icmp_ip_packet_to_self(self, intf, pkt):
"""If pkt is part of an HTTP exchange on HTTP_PORT, then the packet is
forwarded as appropriate (this node acts as a proxy server) Otherwise,
the default superclass implementation is called."""
if pkt.is_valid_tcp() and self.__has_web_server_ip():
"""If pkt is to an HTTP_PORT, then the packet is handed off to the HTTP
server. Otherwise, the default superclass implementation is called."""
if pkt.is_valid_tcp():
if is_http_port(pkt.tcp_dst_port):
self.handle_http_request(intf, pkt)
return
elif is_http_port(pkt.tcp_src_port):
logging.warning('Did not expect to get an HTTP reply through this path anymore')
self.handle_http_reply(intf, pkt)
return

BasicNode.handle_non_icmp_ip_packet_to_self(self, intf, pkt)

@staticmethod
def __cim(ci, myport):
"""Stringifies a client info 2-tuple and port number belonging to me."""
ip, port = ci
return 'client=%s:%d me=%d' % (inet_ntoa(ip), struct.unpack('>H', port)[0],
struct.unpack('>H', myport)[0])

NEXT_TCP_PORT = 10000
HTTP_SESSIONS = {}

@staticmethod
def get_and_advance_tcp_port():
ret = WebServer.NEXT_TCP_PORT
WebServer.NEXT_TCP_PORT += 1
if WebServer.NEXT_TCP_PORT > 65535:
WebServer.NEXT_TCP_PORT = 10000
return ret

def handle_http_request(self, intf, pkt):
"""Forward the received packet from an HTTP client to the web server."""
# see if we are already working with this connection
client_info = (pkt.ip_src, pkt.tcp_src_port)
my_port = self.conns.get(client_info)
if my_port is None:
# new connection: allocate a port for it
my_port = struct.pack('> H', WebServer.get_and_advance_tcp_port())
WebServer.HTTP_SESSIONS[my_port] = self
self.conns[client_info] = my_port
self.conns[my_port] = client_info
logging.debug('%s forwarding new HTTP request: %s' %
(self.di(), self.__cim(client_info, my_port)))
else:
logging.debug('%s forwarding ongoing HTTP request: %s' %
(self.di(), self.__cim(client_info, my_port)))

# rewrite and forward the request to the web server we're proxying
new_dst = self.web_server_to_proxy_ip
new_packet = pkt.modify_tcp_packet(intf.ip, my_port,
new_dst, pkt.tcp_dst_port,
reverse_eth=True)

# send it directly - the router on the topology shouldn't even know
logging.debug('%s sending packet out to the real world directly (to proxy): %s' % (self.di(), pktstr(new_packet)))
self.topo.send_packet_to_gateway(new_packet)

self.__check_for_teardown(pkt, client_info, my_port)

def handle_http_reply(self, intf, pkt):
"""Forward the received packet from the web server to the HTTP client."""
if pkt.ip_src != self.web_server_to_proxy_ip:
logging.debug('%s ignoring HTTP reply from unexpected source %s' % (self.di(), addrstr(pkt.ip_dst)))
return # ignore HTTP replies unless they're from our web server

client_info = self.conns.get(pkt.tcp_dst_port)
if client_info is None:
logging.debug('%s ignoring unexpected HTTP reply to my port %s' % (self.di(), struct.unpack('>H',pkt.tcp_dst_port)[0]))
return # ignore unexpected replies
logging.debug('%s forwarding HTTP reply to client from me %s' % (self.di(), self.__cim(client_info, pkt.tcp_dst_port)))

if self.reply_sub_f:
sz = len(pkt.tcp_data)
pkt.tcp_data = self.reply_re.sub(self.reply_sub_f, pkt.tcp_data)

if self.reply_re_sip:
src_ip = inet_ntoa(client_info[0])
src_port = struct.unpack('>H', client_info[1])[0]
sip_repl_txt = '%15s:%-5s' % (src_ip, src_port)
sz_before = len(pkt.tcp_data)
pkt.tcp_data = self.reply_re_sip.sub(sip_repl_txt, pkt.tcp_data)
sz_after = len(pkt.tcp_data)
logging.debug('size was %d then %d' % (sz_before, sz_after))

# rewrite and forward the reply back to the client its associated with
(client_ip, client_tcp_port) = client_info
new_packet = pkt.modify_tcp_packet(intf.ip, pkt.tcp_src_port,
client_ip, client_tcp_port,
reverse_eth=True)
intf.link.send_to_other(intf, new_packet)

self.__check_for_teardown(pkt, pkt.tcp_dst_port, client_info)

def __check_for_teardown(self, pkt, side_from, other_side):
"""Checks to see if a TCP RST or the final FIN has been received from
side_from and handles them appropriately if so."""
if pkt.is_tcp_rst() or self.__is_full_close(pkt, side_from, other_side):
del self.conns[side_from]
del self.conns[other_side]
WebServer.HTTP_SESSIONS.pop(side_from, None)
WebServer.HTTP_SESSIONS.pop(other_side, None)
self.fins.pop(other_side, None)
logging.debug('%s HTTP connection state removed (RST or final FIN)' % self.di())

def __is_full_close(self, pkt, side_from, other_side):
"""Checks to see if pkt from side_from is a FIN. Returns True if
other_side has already sent a FIN. Otherwise returns False."""
if not pkt.is_tcp_fin():
return False
elif self.fins.has_key(other_side):
return True
else:
self.fins[side_from] = True # cleaned up by __check_for_teardown
return False
tcp_conn = self.http_server.handle_tcp(pkt)
if tcp_conn:
tcp_pts = tcp_conn.get_packets_to_send()
if tcp_pts:
for tcp, data in tcp_pts:
eth = pkt.get_reversed_eth()
ip = pkt.get_reversed_ip(new_ttl=64, new_tlen=pkt.ip_hlen+len(tcp)+len(data))
pkt_out = eth + ip + Packet.cksum_tcp_hdr(ip, tcp, data) + data
logging.debug('%s sending packet from HTTP server: %s' % (self, pktstr(pkt_out)))
intf.link.send_to_other(intf, pkt_out)

def __str__(self):
ps = ' proxying={%s->%s}' % (self.web_server_to_proxy_hostname, addrstr(self.web_server_to_proxy_ip))
ps = ' serving:%s' % self.http_server.get_path_being_served()
return BasicNode.__str__(self) + ps
3 changes: 3 additions & 0 deletions settings.py
Expand Up @@ -40,3 +40,6 @@

# Max time (in seconds) to keep an idle topology. (0 = no max)
MAX_INACTIVE_TOPOLOGY_LIFE_SEC = 900

# For VNS web server nodes: root folder which they will serve from
WEB_SERVER_ROOT_WWW = './vns_web_server_www/'
16 changes: 8 additions & 8 deletions web/vnswww/admin.py
@@ -1,6 +1,6 @@
from django.contrib import admin
from models import Simulator, Organization, UserProfile, StatsTopology, \
TopologyTemplate, Node, WebServer, WebServerHostname, Port, Link, \
TopologyTemplate, Node, WebServer, WebServerPath, Port, Link, \
Topology, TopologySourceIPFilter, TopologyUserFilter, \
IPAssignment, MACAssignment, IPBlock, IPBlockAllocation, \
RecentIPBlockAllocation, SystemInfo
Expand Down Expand Up @@ -35,14 +35,14 @@ class NodeAdmin(admin.ModelAdmin):
search_fields = ('template__name', 'name', 'type__name')

class WebServerAdmin(admin.ModelAdmin):
list_display = ('template', 'name', 'type', 'web_server_addr', 'replace_hostname_in_http_replies')
list_display = ('template', 'name', 'type', 'path_to_serve')
ordering = ('template', 'name')
search_fields = ('template__name', 'name', 'type__name', 'web_server_addr')
search_fields = ('template__name', 'name', 'type__name', 'path_to_serve')

class WebServerHostnameAdmin(admin.ModelAdmin):
list_display = ('hostname',)
ordering = ('hostname',)
search_fields = ('hostname',)
class WebServerPathAdmin(admin.ModelAdmin):
list_display = ('path',)
ordering = ('path',)
search_fields = ('path',)

class PortAdmin(admin.ModelAdmin):
list_display = ('node', 'name', 'ip_offset')
Expand Down Expand Up @@ -111,7 +111,7 @@ class SystemInfoAdmin(admin.ModelAdmin):
admin.site.register(TopologyTemplate, TopologyTemplateAdmin)
admin.site.register(Node, NodeAdmin)
admin.site.register(WebServer, WebServerAdmin)
admin.site.register(WebServerHostname, WebServerHostnameAdmin)
admin.site.register(WebServerPath, WebServerPathAdmin)
admin.site.register(Port, PortAdmin)
admin.site.register(Link, LinkAdmin)
admin.site.register(Topology, TopologyAdmin)
Expand Down
35 changes: 20 additions & 15 deletions web/vnswww/models.py
Expand Up @@ -211,31 +211,36 @@ class Node(Model):
def __unicode__(self):
return u'%s: %s' % (self.template.name, self.name)

class WebServerHostname(Model):
"""A web server hostname which can be proxied by a simulated web server."""
hostname = CharField(max_length=256)

def get_ascii_hostname(self):
return self.hostname.encode('ascii')
class WebServerPath(Model):
"""A path which a web server can serve."""
path = CharField(max_length=512,
help_text='This path will be relative to APP_SERVER_ROOT_WWW ' + \
'folder in the VNS root folder.')

RE_TWO_PERIODS = re.compile(r'[.][.]')
RE_OK_PATH = re.compile(r'^[-A-Za-z0-9_.][-A-Za-z0-9_./]*$')
def clean(self):
from django.core.exceptions import ValidationError
if WebServerPath.RE_TWO_PERIODS.search(self.path):
raise ValidationError('path may not contain two periods next to one another')
elif not WebServerPath.RE_OK_PATH.match(self.path):
raise ValidationError('path must only contain letters, numbers, dashes, underscores, periods, and slashes and must not start with a slash.')

def get_ascii_path(self):
return self.path.encode('ascii')

def __unicode__(self):
return self.hostname
return self.path

class WebServer(Node):
"""A web server node. It specifies which web server it will proxy (i.e.,
if you connect to it, what website will it appear to serve). This is
limited to choices in the WebServerHostname table to prevent users from
using the system to retrieve content from questionable sources."""
web_server_addr = ForeignKey(WebServerHostname)
replace_hostname_in_http_replies = \
BooleanField(default=True,
help_text='If true, then HTTP replies will have any ' + \
'occurrence of the hostname within the "href"' + \
'field of the "a" tag replaced with this ' + \
'node\'s IP address.')
path_to_serve = ForeignKey(WebServerPath)

def __unicode__(self):
return Node.__unicode__(self) + ' -> %s' % self.web_server_addr.__unicode__()
return Node.__unicode__(self) + ' -> %s' % self.path_to_serve.__unicode__()

class PortTreeNode():
"""A node in a tree of ports."""
Expand Down

0 comments on commit e171dbd

Please sign in to comment.